From 2b41009805cb40d7981dfa25bfed3db91faf9310 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Tue, 1 Sep 2015 09:32:29 -0700 Subject: [PATCH] Adjust runc to new opencontainers/specs version I deleted possibility to specify config file from commands for now. Until we decide how it'll be done. Also I changed runc spec interface to write config files instead of output them. Signed-off-by: Alexander Morozov --- Godeps/Godeps.json | 2 +- .../opencontainers/specs/MAINTAINERS | 8 + .../github.com/opencontainers/specs/README.md | 33 +- .../github.com/opencontainers/specs/bundle.md | 20 +- .../opencontainers/specs/config-linux.md | 212 +------ .../specs/{spec.go => config.go} | 36 +- .../github.com/opencontainers/specs/config.md | 61 +-- .../opencontainers/specs/config_linux.go | 29 + .../specs/runtime-config-linux.md | 195 +++++++ .../opencontainers/specs/runtime-config.md | 54 ++ .../opencontainers/specs/runtime-linux.md | 6 + .../opencontainers/specs/runtime_config.go | 36 ++ ...{spec_linux.go => runtime_config_linux.go} | 40 +- README.md | 518 ++++++++++-------- restore.go | 4 +- spec.go | 155 ++++-- start.go | 15 +- 17 files changed, 808 insertions(+), 616 deletions(-) create mode 100644 Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS rename Godeps/_workspace/src/github.com/opencontainers/specs/{spec.go => config.go} (61%) create mode 100644 Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go create mode 100644 Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md create mode 100644 Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config.md create mode 100644 Godeps/_workspace/src/github.com/opencontainers/specs/runtime-linux.md create mode 100644 Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config.go rename Godeps/_workspace/src/github.com/opencontainers/specs/{spec_linux.go => runtime_config_linux.go} (88%) diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 9d815df2960..119c2c1727d 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -53,7 +53,7 @@ }, { "ImportPath": "github.com/opencontainers/specs", - "Rev": "da9240a7125f601aef46f66ea615177607b00d39" + "Rev": "138deee1418d2376a4e9d4966e9f1543e717a641" }, { "ImportPath": "github.com/seccomp/libseccomp-golang", diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS b/Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS new file mode 100644 index 00000000000..906510b6001 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/MAINTAINERS @@ -0,0 +1,8 @@ +Michael Crosby (@crosbymichael) +Alexander Morozov (@LK4D4) +Rohit Jnagal (@rjnagal) +Victor Marmol (@vmarmol) +Mrunal Patel (@mrunalp) +Vincent Batts (@vbatts) +Daniel, Dao Quang Minh (@dqminh) +Brandon Philips (@philips) diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/README.md b/Godeps/_workspace/src/github.com/opencontainers/specs/README.md index f85d5fff56e..a98dc4acdaa 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/README.md +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/README.md @@ -54,21 +54,32 @@ With Standard Containers we can put an end to that embarrassment, by making INDU # Contributing -Development happens on github for the spec. Issues are used for bugs and actionable items and longer -discussions can happen on the mailing list. You can subscribe and join the mailing list on -[google groups](https://groups.google.com/a/opencontainers.org/forum/#!forum/dev). +Development happens on GitHub for the spec. +Issues are used for bugs and actionable items and longer discussions can happen on the [mailing list](#mailing-list). -The specification and code is licensed under the Apache 2.0 license found in -the `LICENSE` file of this repository. +The specification and code is licensed under the Apache 2.0 license found in the `LICENSE` file of this repository. + +## Discuss your design + +The project welcomes submissions, but please let everyone know what you are working on. + +Before undertaking a nontrivial change to this specification, send mail to the [mailing list](#mailing-list) to discuss what you plan to do. +This gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits. +It also guarantees that the design is sound before code is written; a GitHub pull-request is not the place for high-level discussions. + +Typos and grammatical errors can go straight to a pull-request. +When in doubt, start on the [mailing-list](#mailing-list). ## Weekly Call The contributors and maintainers of the project have a weekly meeting Wednesdays at 10:00 AM PST. -The link to the call will be posted on the mailing list each week along with set topics for discussion. -Everyone is welcome to participate in the call, although there can only be speaking members on the Google Hangout. -Participants who don't get a speaking slot can watch the live broadcast on [this YouTube channel][youtube] and post feedback and questions on [the IRC channel](#irc). -Everyone is welcome to propose additional topics, suggest other agenda alterations, or request a speaking slot via the mailing list. -Minutes for the call will be posted to the mailing list for those who are unable to join the call. +Everyone is welcome to participate in the [BlueJeans call][BlueJeans]. +An initial agenda will be posted to the [mailing list](#mailing-list) earlier in the week, and everyone is welcome to propose additional topics or suggest other agenda alterations there. +Minutes for the call will be posted to the [mailing list](#mailing-list) for those who are unable to join the call. + +## Mailing List + +You can subscribe and join the mailing list on [Google Groups](https://groups.google.com/a/opencontainers.org/forum/#!forum/dev). ## IRC @@ -135,4 +146,4 @@ using your real name (sorry, no pseudonyms or anonymous contributions.) You can add the sign off when creating the git commit via `git commit -s`. -[youtube]: https://www.youtube.com/channel/UC1wmLdEYmwWcsFg7bt1s5nw +[BlueJeans]: https://bluejeans.com/1771332256/ diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/bundle.md b/Godeps/_workspace/src/github.com/opencontainers/specs/bundle.md index ff6906f029f..58f4146f640 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/bundle.md +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/bundle.md @@ -12,19 +12,19 @@ A standard container bundle is made of the following 3 parts: # Directory layout -A Standard Container bundle is a directory containing all the content needed to load and run a container. This includes its configuration file (`config.json`) and content directories. The main property of this directory layout is that it can be moved as a unit to another machine and run the same container. +A Standard Container bundle is a directory containing all the content needed to load and run a container. +This includes two configuration files `config.json` and `runtime.json`, and a rootfs directory. +The `config.json` file contains settings that are host independent and application specific such as security permissions, environment variables and arguments. +The `runtime.json` file contains settings that are host specific such as memory limits, local device access and mount points. +The goal is that the bundle can be moved as a unit to another machine and run the same application if `runtime.json` is removed or reconfigured. The syntax and semantics for `config.json` are described in [this specification](config.md). -One or more *content directories* may be adjacent to the configuration file. This must include at least the root filesystem (referenced in the configuration file by the *root* field) and may include other related content (signatures, other configs, etc.). The interpretation of these resources is specified in the configuration. The names of the directories may be arbitrary, but users should consider using conventional names as in the example below. +A single `rootfs` directory MUST be in the same directory as the `config.json`. +The names of the directories may be arbitrary, but users should consider using conventional names as in the example below. ``` -/ -! ---- config.json -! ---- rootfs -! ---- signatures +config.json +runtime.json +rootfs/ ``` - diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/config-linux.md b/Godeps/_workspace/src/github.com/opencontainers/specs/config-linux.md index d6ff683170b..aab97696773 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/config-linux.md +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/config-linux.md @@ -5,142 +5,7 @@ cgroups, capabilities, LSM, and file system jails to fulfill the spec. Additional information is needed for Linux over the [default spec configuration](config.md) in order to configure these various kernel features. -## Linux namespaces - -A namespace wraps a global system resource in an abstraction that makes it -appear to the processes within the namespace that they have their own isolated -instance of the global resource. Changes to the global resource are visible to -other processes that are members of the namespace, but are invisible to other -processes. For more information, see [the man page](http://man7.org/linux/man-pages/man7/namespaces.7.html) - -Namespaces are specified in the spec as an array of entries. Each entry has a -type field with possible values described below and an optional path element. -If a path is specified, that particular file is used to join that type of namespace. - -```json - "namespaces": [ - { - "type": "pid", - "path": "/proc/1234/ns/pid" - }, - { - "type": "net", - "path": "/var/run/netns/neta" - }, - { - "type": "mnt", - }, - { - "type": "ipc", - }, - { - "type": "uts", - }, - { - "type": "user", - }, - ] -``` - -#### Namespace types - -* **pid** processes inside the container will only be able to see other processes inside the same container. -* **network** the container will have it's own network stack. -* **mnt** the container will have an isolated mount table. -* **ipc** processes inside the container will only be able to communicate to other processes inside the same -container via system level IPC. -* **uts** the container will be able to have it's own hostname and domain name. -* **user** the container will be able to remap user and group IDs from the host to local users and groups -within the container. - -### Access to devices - -Devices is an array specifying the list of devices to be created in the container. -Next parameters can be specified: - -* type - type of device: 'c', 'b', 'u' or 'p'. More info in `man mknod` -* path - full path to device inside container -* major, minor - major, minor numbers for device. More info in `man mknod`. - There is special value: `-1`, which means `*` for `device` - cgroup setup. -* permissions - cgroup permissions for device. A composition of 'r' - (read), 'w' (write), and 'm' (mknod). -* fileMode - file mode for device file -* uid - uid of device owner -* gid - gid of device owner - -```json - "devices": [ - { - "path": "/dev/random", - "type": "c", - "major": 1, - "minor": 8, - "permissions": "rwm", - "fileMode": 0666, - "uid": 0, - "gid": 0 - }, - { - "path": "/dev/urandom", - "type": "c", - "major": 1, - "minor": 9, - "permissions": "rwm", - "fileMode": 0666, - "uid": 0, - "gid": 0 - }, - { - "path": "/dev/null", - "type": "c", - "major": 1, - "minor": 3, - "permissions": "rwm", - "fileMode": 0666, - "uid": 0, - "gid": 0 - }, - { - "path": "/dev/zero", - "type": "c", - "major": 1, - "minor": 5, - "permissions": "rwm", - "fileMode": 0666, - "uid": 0, - "gid": 0 - }, - { - "path": "/dev/tty", - "type": "c", - "major": 5, - "minor": 0, - "permissions": "rwm", - "fileMode": 0666, - "uid": 0, - "gid": 0 - }, - { - "path": "/dev/full", - "type": "c", - "major": 1, - "minor": 7, - "permissions": "rwm", - "fileMode": 0666, - "uid": 0, - "gid": 0 - } - ] -``` - -## Linux control groups - -Also known as cgroups, they are used to restrict resource usage for a container and handle -device access. cgroups provide controls to restrict cpu, memory, IO, and network for -the container. For more information, see the [kernel cgroups documentation](https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt) - -## Linux capabilities +## Capabilities Capabilities is an array that specifies Linux capabilities that can be provided to the process inside the container. Valid values are the string after `CAP_` for capabilities defined @@ -154,33 +19,15 @@ in [the man page](http://man7.org/linux/man-pages/man7/capabilities.7.html) ] ``` -## Linux sysctl - -sysctl allows kernel parameters to be modified at runtime for the container. -For more information, see [the man page](http://man7.org/linux/man-pages/man8/sysctl.8.html) - -```json - "sysctl": { - "net.ipv4.ip_forward": "1", - "net.core.somaxconn": "256" - } -``` +## Rootfs Mount Propagation -## Linux rlimits +rootfsPropagation sets the rootfs's mount propagation. Its value is either slave, private, or shared. [The kernel doc](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt) has more information about mount propagation. ```json - "rlimits": [ - { - "type": "RLIMIT_NPROC", - "soft": 1024, - "hard": 102400 - } - ] + "rootfsPropagation": "slave", ``` -rlimits allow setting resource limits. The type is from the values defined in [the man page](http://man7.org/linux/man-pages/man2/setrlimit.2.html). The kernel enforces the soft limit for a resource while the hard limit acts as a ceiling for that value that could be set by an unprivileged process. - -## Linux user namespace mappings +## User namespace mappings ```json "uidMappings": [ @@ -199,48 +46,7 @@ rlimits allow setting resource limits. The type is from the values defined in [t ] ``` -uid/gid mappings describe the user namespace mappings from the host to the container. *hostID* is the starting uid/gid on the host to be mapped to *containerID* which is the starting uid/gid in the container and *size* refers to the number of ids to be mapped. The Linux kernel has a limit of 5 such mappings that can be specified. - -## Rootfs Mount Propagation -rootfsPropagation sets the rootfs's mount propagation. Its value is either slave, private, or shared. [The kernel doc](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt) has more information about mount propagation. - -```json - "rootfsPropagation": "slave", -``` - -## Selinux process label - -Selinux process label specifies the label with which the processes in a container are run. -For more information about SELinux, see [Selinux documentation](http://selinuxproject.org/page/Main_Page) -```json - "selinuxProcessLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675" -``` - -## Apparmor profile - -Apparmor profile specifies the name of the apparmor profile that will be used for the container. -For more information about Apparmor, see [Apparmor documentation](https://wiki.ubuntu.com/AppArmor) - -```json - "apparmorProfile": "acme_secure_profile" -``` - -## Seccomp - -Seccomp provides application sandboxing mechanism in the Linux kernel. -Seccomp configuration allows one to configure actions to take for matched syscalls and furthermore also allows -matching on values passed as arguments to syscalls. -For more information about Seccomp, see [Seccomp kernel documentation](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt) -The actions and operators are strings that match the definitions in seccomp.h from [libseccomp](https://github.com/seccomp/libseccomp) and are translated to corresponding values. - -```json - "seccomp": { - "defaultAction": "SCMP_ACT_ALLOW", - "syscalls": [ - { - "name": "getcwd", - "action": "SCMP_ACT_ERRNO" - } - ] - } -``` +uid/gid mappings describe the user namespace mappings from the host to the container. +The mappings represent how the bundle `rootfs` expects the user namespace to be setup and the runtime SHOULD NOT modify the permissions on the rootfs to realize the mapping. +*hostID* is the starting uid/gid on the host to be mapped to *containerID* which is the starting uid/gid in the container and *size* refers to the number of ids to be mapped. +There is a limit of 5 mappings which is the Linux kernel hard limit. diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/spec.go b/Godeps/_workspace/src/github.com/opencontainers/specs/config.go similarity index 61% rename from Godeps/_workspace/src/github.com/opencontainers/specs/spec.go rename to Godeps/_workspace/src/github.com/opencontainers/specs/config.go index 2100cca39c5..6d292773504 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/spec.go +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/config.go @@ -14,30 +14,7 @@ type Spec struct { // Hostname is the container's host name. Hostname string `json:"hostname"` // Mounts profile configuration for adding mounts to the container's filesystem. - Mounts []Mount `json:"mounts"` - // Hooks are the commands run at various lifecycle events of the container. - Hooks Hooks `json:"hooks"` -} - -type Hooks struct { - // Prestart is a list of hooks to be run before the container process is executed. - // On Linux, they are run after the container namespaces are created. - Prestart []Hook `json:"prestart"` - // Poststop is a list of hooks to be run after the container process exits. - Poststop []Hook `json:"poststop"` -} - -// Mount specifies a mount for a container. -type Mount struct { - // Type specifies the mount kind. - Type string `json:"type"` - // Source specifies the source path of the mount. In the case of bind mounts on - // linux based systems this would be the file on the host. - Source string `json:"source"` - // Destination is the path where the mount will be placed relative to the container's root. - Destination string `json:"destination"` - // Options are fstab style mount options. - Options string `json:"options"` + MountPoints []MountPoint `json:"mounts"` } // Process contains information to start a specific application inside the container. @@ -72,9 +49,10 @@ type Platform struct { Arch string `json:"arch"` } -// Hook specifies a command that is run at a particular event in the lifecycle of a container. -type Hook struct { - Path string `json:"path"` - Args []string `json:"args"` - Env []string `json:"env"` +// MountPoint describes a directory that may be fullfilled by a mount in the runtime.json. +type MountPoint struct { + // Name is a unique descriptive identifier for this mount point. + Name string `json:"name"` + // Path specifies the path of the mount. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point. + Path string `json:"path"` } diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/config.md b/Godeps/_workspace/src/github.com/opencontainers/specs/config.md index f0adcb44e56..00f24aa6fce 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/config.md +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/config.md @@ -1,6 +1,6 @@ # Configuration file -The container’s top-level directory MUST contain a configuration file called `config.json`. +The container's top-level directory MUST contain a configuration file called `config.json`. For now the canonical schema is defined in [spec.go](spec.go) and [spec_linux.go](spec_linux.go), but this will be moved to a formal JSON schema over time. The configuration file contains metadata necessary to implement standard operations against the container. @@ -34,61 +34,6 @@ Each container has exactly one *root filesystem*, specified in the *root* object } ``` -## Mount Configuration - -Additional filesystems can be declared as "mounts", specified in the *mounts* array. The parameters are similar to the ones in Linux mount system call. [http://linux.die.net/man/2/mount](http://linux.die.net/man/2/mount) - -* **type** (string, required) Linux, *filesystemtype* argument supported by the kernel are listed in */proc/filesystems* (e.g., "minix", "ext2", "ext3", "jfs", "xfs", "reiserfs", "msdos", "proc", "nfs", "iso9660"). Windows: ntfs -* **source** (string, required) a device name, but can also be a directory name or a dummy. Windows, the volume name that is the target of the mount point. \\?\Volume\{GUID}\ (on Windows source is called target) -* **destination** (string, required) where the source filesystem is mounted relative to the container rootfs. -* **options** (string, optional) in the fstab format [https://wiki.archlinux.org/index.php/Fstab](https://wiki.archlinux.org/index.php/Fstab). - -*Example (Linux)* - -```json -"mounts": [ - { - "type": "proc", - "source": "proc", - "destination": "/proc", - "options": "" - }, - { - "type": "tmpfs", - "source": "tmpfs", - "destination": "/dev", - "options": "nosuid,strictatime,mode=755,size=65536k" - }, - { - "type": "devpts", - "source": "devpts", - "destination": "/dev/pts", - "options": "nosuid,noexec,newinstance,ptmxmode=0666,mode=0620,gid=5" - }, - { - "type": "bind", - "source": "/volumes/testing", - "destination": "/data", - "options": "rbind,rw" - } -] -``` - -*Example (Windows)* - -```json -"mounts": [ - { - "type": "ntfs", - "source": "\\\\?\\Volume\\{2eca078d-5cbc-43d3-aff8-7e8511f60d0e}\\", - "destination": "C:\\Users\\crosbymichael\\My Fancy Mount Point\\", - "options": "" - } -] -``` - -See links for details about [mountvol](http://ss64.com/nt/mountvol.html) and [SetVolumeMountPoint](https://msdn.microsoft.com/en-us/library/windows/desktop/aa365561(v=vs.85).aspx) in Windows. - ## Process configuration * **terminal** (bool, optional) specifies whether you want a terminal attached to that process. Defaults to false. @@ -111,13 +56,13 @@ For Linux-based systems the user structure has the following fields: "user": { "uid": 1, "gid": 1, - "additionalGids": [] + "additionalGids": [5, 6] }, "env": [ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm" ], - "cwd": "", + "cwd": "/root", "args": [ "sh" ] diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go b/Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go new file mode 100644 index 00000000000..4bfb78325de --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/config_linux.go @@ -0,0 +1,29 @@ +// +build linux + +package specs + +// LinuxSpec is the full specification for linux containers. +type LinuxSpec struct { + Spec + // Linux is platform specific configuration for linux based containers. + Linux Linux `json:"linux"` +} + +// Linux contains platform specific configuration for linux based containers. +type Linux struct { + // Capabilities are linux capabilities that are kept for the container. + Capabilities []string `json:"capabilities"` + // RootfsPropagation is the rootfs mount propagation mode for the container. + RootfsPropagation string `json:"rootfsPropagation"` +} + +// User specifies linux specific user and group information for the container's +// main process. +type User struct { + // UID is the user id. + UID int32 `json:"uid"` + // GID is the group id. + GID int32 `json:"gid"` + // AdditionalGids are additional group ids set for the container's process. + AdditionalGids []int32 `json:"additionalGids"` +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md new file mode 100644 index 00000000000..1cabc6156ea --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md @@ -0,0 +1,195 @@ +## Namespaces + +A namespace wraps a global system resource in an abstraction that makes it appear to the processes within the namespace that they have their own isolated instance of the global resource. +Changes to the global resource are visible to other processes that are members of the namespace, but are invisible to other processes. +For more information, see [the man page](http://man7.org/linux/man-pages/man7/namespaces.7.html). + +Namespaces are specified in the spec as an array of entries. +Each entry has a type field with possible values described below and an optional path element. +If a path is specified, that particular file is used to join that type of namespace. + +```json + "namespaces": [ + { + "type": "pid", + "path": "/proc/1234/ns/pid" + }, + { + "type": "net", + "path": "/var/run/netns/neta" + }, + { + "type": "mnt", + }, + { + "type": "ipc", + }, + { + "type": "uts", + }, + { + "type": "user", + }, + ] +``` + +#### Namespace types + +* **pid** processes inside the container will only be able to see other processes inside the same container. +* **network** the container will have its own network stack. +* **mnt** the container will have an isolated mount table. +* **ipc** processes inside the container will only be able to communicate to other processes inside the same +container via system level IPC. +* **uts** the container will be able to have its own hostname and domain name. +* **user** the container will be able to remap user and group IDs from the host to local users and groups +within the container. + +### Access to devices + +Devices is an array specifying the list of devices to be created in the container. +Next parameters can be specified: + +* type - type of device: 'c', 'b', 'u' or 'p'. More info in `man mknod` +* path - full path to device inside container +* major, minor - major, minor numbers for device. More info in `man mknod`. + There is special value: `-1`, which means `*` for `device` + cgroup setup. +* permissions - cgroup permissions for device. A composition of 'r' + (read), 'w' (write), and 'm' (mknod). +* fileMode - file mode for device file +* uid - uid of device owner +* gid - gid of device owner + +```json + "devices": [ + { + "path": "/dev/random", + "type": "c", + "major": 1, + "minor": 8, + "permissions": "rwm", + "fileMode": 0666, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/urandom", + "type": "c", + "major": 1, + "minor": 9, + "permissions": "rwm", + "fileMode": 0666, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/null", + "type": "c", + "major": 1, + "minor": 3, + "permissions": "rwm", + "fileMode": 0666, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/zero", + "type": "c", + "major": 1, + "minor": 5, + "permissions": "rwm", + "fileMode": 0666, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/tty", + "type": "c", + "major": 5, + "minor": 0, + "permissions": "rwm", + "fileMode": 0666, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/full", + "type": "c", + "major": 1, + "minor": 7, + "permissions": "rwm", + "fileMode": 0666, + "uid": 0, + "gid": 0 + } + ] +``` + +## Control groups + +Also known as cgroups, they are used to restrict resource usage for a container and handle +device access. cgroups provide controls to restrict cpu, memory, IO, and network for +the container. For more information, see the [kernel cgroups documentation](https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt). + +## Sysctl + +sysctl allows kernel parameters to be modified at runtime for the container. +For more information, see [the man page](http://man7.org/linux/man-pages/man8/sysctl.8.html) + +```json + "sysctl": { + "net.ipv4.ip_forward": "1", + "net.core.somaxconn": "256" + } +``` + +## Rlimits + +```json + "rlimits": [ + { + "type": "RLIMIT_NPROC", + "soft": 1024, + "hard": 102400 + } + ] +``` + +rlimits allow setting resource limits. The type is from the values defined in [the man page](http://man7.org/linux/man-pages/man2/setrlimit.2.html). The kernel enforces the soft limit for a resource while the hard limit acts as a ceiling for that value that could be set by an unprivileged process. + +## SELinux process label + +SELinux process label specifies the label with which the processes in a container are run. +For more information about SELinux, see [Selinux documentation](http://selinuxproject.org/page/Main_Page) +```json + "selinuxProcessLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675" +``` + +## Apparmor profile + +Apparmor profile specifies the name of the apparmor profile that will be used for the container. +For more information about Apparmor, see [Apparmor documentation](https://wiki.ubuntu.com/AppArmor) + +```json + "apparmorProfile": "acme_secure_profile" +``` + +## seccomp + +Seccomp provides application sandboxing mechanism in the Linux kernel. +Seccomp configuration allows one to configure actions to take for matched syscalls and furthermore also allows +matching on values passed as arguments to syscalls. +For more information about Seccomp, see [Seccomp kernel documentation](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt) +The actions and operators are strings that match the definitions in seccomp.h from [libseccomp](https://github.com/seccomp/libseccomp) and are translated to corresponding values. + +```json + "seccomp": { + "defaultAction": "SCMP_ACT_ALLOW", + "syscalls": [ + { + "name": "getcwd", + "action": "SCMP_ACT_ERRNO" + } + ] + } +``` diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config.md b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config.md new file mode 100644 index 00000000000..6074e98bf9d --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config.md @@ -0,0 +1,54 @@ +## Mount Configuration + +Additional filesystems can be declared as "mounts", specified in the *mounts* array. The parameters are similar to the ones in Linux mount system call. [http://linux.die.net/man/2/mount](http://linux.die.net/man/2/mount) + +* **type** (string, required) Linux, *filesystemtype* argument supported by the kernel are listed in */proc/filesystems* (e.g., "minix", "ext2", "ext3", "jfs", "xfs", "reiserfs", "msdos", "proc", "nfs", "iso9660"). Windows: ntfs +* **source** (string, required) a device name, but can also be a directory name or a dummy. Windows, the volume name that is the target of the mount point. \\?\Volume\{GUID}\ (on Windows source is called target) +* **destination** (string, required) where the source filesystem is mounted relative to the container rootfs. +* **options** (list of strings, optional) in the fstab format [https://wiki.archlinux.org/index.php/Fstab](https://wiki.archlinux.org/index.php/Fstab). + +*Example (Linux)* + +```json +"mounts": [ + { + "type": "proc", + "source": "proc", + "destination": "/proc", + "options": [] + }, + { + "type": "tmpfs", + "source": "tmpfs", + "destination": "/dev", + "options": ["nosuid","strictatime","mode=755","size=65536k"] + }, + { + "type": "devpts", + "source": "devpts", + "destination": "/dev/pts", + "options": ["nosuid","noexec","newinstance","ptmxmode=0666","mode=0620","gid=5"] + }, + { + "type": "bind", + "source": "/volumes/testing", + "destination": "/data", + "options": ["rbind","rw"] + } +] +``` + +*Example (Windows)* + +```json +"mounts": [ + { + "type": "ntfs", + "source": "\\\\?\\Volume\\{2eca078d-5cbc-43d3-aff8-7e8511f60d0e}\\", + "destination": "C:\\Users\\crosbymichael\\My Fancy Mount Point\\", + "options": [] + } +] +``` + +See links for details about [mountvol](http://ss64.com/nt/mountvol.html) and [SetVolumeMountPoint](https://msdn.microsoft.com/en-us/library/windows/desktop/aa365561(v=vs.85).aspx) in Windows. diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-linux.md b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-linux.md new file mode 100644 index 00000000000..dcfa24eb4da --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime-linux.md @@ -0,0 +1,6 @@ +## File descriptors +By default, only the `stdin`, `stdout` and `stderr` file descriptors are kept open for the application by the runtime. + +The runtime may pass additional file descriptors to the application to support features such as [socket activation](http://0pointer.de/blog/projects/socket-activated-containers.html). + +Some of the file descriptors may be redirected to `/dev/null` even though they are open. diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config.go b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config.go new file mode 100644 index 00000000000..9a08ab96d4d --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config.go @@ -0,0 +1,36 @@ +package specs + +type RuntimeSpec struct { + // Mounts profile configuration for adding mounts to the container's filesystem. + Mounts []Mount `json:"mounts"` + // Hooks are the commands run at various lifecycle events of the container. + Hooks Hooks `json:"hooks"` +} + +// Hook specifies a command that is run at a particular event in the lifecycle of a container. +type Hook struct { + Path string `json:"path"` + Args []string `json:"args"` + Env []string `json:"env"` +} + +type Hooks struct { + // Prestart is a list of hooks to be run before the container process is executed. + // On Linux, they are run after the container namespaces are created. + Prestart []Hook `json:"prestart"` + // Poststop is a list of hooks to be run after the container process exits. + Poststop []Hook `json:"poststop"` +} + +// Mount specifies a mount for a container. +type Mount struct { + // Type specifies the mount kind. + Type string `json:"type"` + // Source specifies the source path of the mount. In the case of bind mounts on + // linux based systems this would be the file on the host. + Source string `json:"source"` + // Destination is the path where the mount will be placed relative to the container's root. + Destination string `json:"destination"` + // Options are fstab style mount options. + Options []string `json:"options"` +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/specs/spec_linux.go b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go similarity index 88% rename from Godeps/_workspace/src/github.com/opencontainers/specs/spec_linux.go rename to Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go index 9319c99ac29..24092237e79 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/specs/spec_linux.go +++ b/Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go @@ -1,23 +1,20 @@ -// +build linux - package specs import "os" -// LinuxSpec is the full specification for Linux containers -type LinuxSpec struct { - Spec - // Linux is platform specific configuration for Linux based containers - Linux Linux `json:"linux"` +// LinuxRuntimeSpec is the full specification for linux containers. +type LinuxRuntimeSpec struct { + RuntimeSpec + // LinuxRuntime is platform specific configuration for linux based containers. + Linux LinuxRuntime `json:"linux"` } -// Linux contains platform specific configuration for Linux based containers -type Linux struct { - // UIDMapping specifies user mappings for supporting user namespaces on Linux +type LinuxRuntime struct { + // UIDMapping specifies user mappings for supporting user namespaces on linux. UIDMappings []IDMapping `json:"uidMappings"` - // GIDMapping specifies group mappings for supporting user namespaces on Linux + // GIDMapping specifies group mappings for supporting user namespaces on linux. GIDMappings []IDMapping `json:"gidMappings"` - // Rlimits specifies rlimit options to apply to the container's process + // Rlimits specifies rlimit options to apply to the container's process. Rlimits []Rlimit `json:"rlimits"` // Sysctl are a set of key value pairs that are set for the container on start Sysctl map[string]string `json:"sysctl"` @@ -26,8 +23,6 @@ type Linux struct { Resources Resources `json:"resources"` // Namespaces contains the namespaces that are created and/or joined by the container Namespaces []Namespace `json:"namespaces"` - // Capabilities are Linux capabilities that are kept for the container - Capabilities []string `json:"capabilities"` // Devices are a list of device nodes that are created and enabled for the container Devices []Device `json:"devices"` // ApparmorProfile specified the apparmor profile for the container. @@ -40,18 +35,7 @@ type Linux struct { RootfsPropagation string `json:"rootfsPropagation"` } -// User specifies Linux specific user and group information for the container's -// main process -type User struct { - // Uid is the user id - UID int32 `json:"uid"` - // Gid is the group id - GID int32 `json:"gid"` - // AdditionalGids are additional group ids set for the container's process - AdditionalGids []int32 `json:"additionalGids"` -} - -// Namespace is the configuration for a Linux namespace +// Namespace is the configuration for a linux namespace. type Namespace struct { // Type is the type of Linux namespace Type string `json:"type"` @@ -167,10 +151,10 @@ type Resources struct { } type Device struct { - // Device type, block, char, etc. - Type rune `json:"type"` // Path to the device. Path string `json:"path"` + // Device type, block, char, etc. + Type rune `json:"type"` // Major is the device's major number. Major int64 `json:"major"` // Minor is the device's minor number. diff --git a/README.md b/README.md index fce024678d5..c8a31308288 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,17 @@ ## State of the project -Currently `runc` is an implementation of the OCF specification. We are currently sprinting +Currently `runc` is an implementation of the OCI specification. We are currently sprinting to have a v1 of the spec out within a quick timeframe of a few weeks, ~July 2015, so the `runc` config format will be constantly changing until the spec is finalized. However, we encourage you to try out the tool and give feedback. ### OCF -How does `runc` integrate with the Open Container Format? `runc` depends on the types -specified in the [specs](https://github.com/opencontainers/specs) repository. Whenever -the specification is updated and ready to be versioned `runc` will update it's dependency +How does `runc` integrate with the Open Container Initiative Specification? +`runc` depends on the types specified in the +[specs](https://github.com/opencontainers/specs) repository. Whenever the +specification is updated and ready to be versioned `runc` will update its dependency on the specs repository and support the update spec. ### Building: @@ -41,229 +42,300 @@ PID USER COMMAND / $ ``` -Or you can specify the path to a JSON configuration file: -```bash -runc start config.json -/ $ ps -PID USER COMMAND -1 daemon sh -5 daemon sh -/ $ -``` -Note: the use of the `start` command is required when specifying a -configuration file. +### OCI Container JSON Format: -### OCF Container JSON Format: - -Below is a sample `config.json` configuration file. It assumes that +Below are sample `config.json` and `runtime.json` configuration files. It assumes that the file-system is found in a directory called `rootfs` and there is a user with uid and gid of `0` defined within that file-system. +`config.json`: +```json +{ + "version": "pre-draft", + "platform": { + "os": "linux", + "arch": "amd64" + }, + "process": { + "terminal": true, + "user": { + "uid": 0, + "gid": 0, + "additionalGids": null + }, + "args": [ + "sh" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "" + }, + "root": { + "path": "rootfs", + "readonly": true + }, + "hostname": "shell", + "mounts": [ + { + "name": "proc", + "path": "/proc" + }, + { + "name": "dev", + "path": "/dev" + }, + { + "name": "devpts", + "path": "/dev/pts" + }, + { + "name": "shm", + "path": "/dev/shm" + }, + { + "name": "mqueue", + "path": "/dev/mqueue" + }, + { + "name": "sysfs", + "path": "/sys" + }, + { + "name": "cgroup", + "path": "/sys/fs/cgroup" + } + ], + "linux": { + "capabilities": [ + "AUDIT_WRITE", + "KILL", + "NET_BIND_SERVICE" + ], + "rootfsPropagation": "" + } +} +``` + +`runtime.json`: ```json { - "version": "pre-draft", - "platform": { - "os": "linux", - "arch": "amd64" - }, - "process": { - "terminal": true, - "user": { - "uid": 0, - "gid": 0, - "additionalGids": null - }, - "args": [ - "sh" - ], - "env": [ - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "TERM=xterm" - ], - "cwd": "" - }, - "root": { - "path": "rootfs", - "readonly": true - }, - "hostname": "shell", - "mounts": [ - { - "type": "proc", - "source": "proc", - "destination": "/proc", - "options": "" - }, - { - "type": "tmpfs", - "source": "tmpfs", - "destination": "/dev", - "options": "nosuid,strictatime,mode=755,size=65536k" - }, - { - "type": "devpts", - "source": "devpts", - "destination": "/dev/pts", - "options": "nosuid,noexec,newinstance,ptmxmode=0666,mode=0620,gid=5" - }, - { - "type": "tmpfs", - "source": "shm", - "destination": "/dev/shm", - "options": "nosuid,noexec,nodev,mode=1777,size=65536k" - }, - { - "type": "mqueue", - "source": "mqueue", - "destination": "/dev/mqueue", - "options": "nosuid,noexec,nodev" - }, - { - "type": "sysfs", - "source": "sysfs", - "destination": "/sys", - "options": "nosuid,noexec,nodev" - }, - { - "type": "cgroup", - "source": "cgroup", - "destination": "/sys/fs/cgroup", - "options": "nosuid,noexec,nodev,relatime,ro" - } - ], - "linux": { - "uidMapping": null, - "gidMapping": null, - "rlimits": [ - { - "type": 7, - "hard": 1024, - "soft": 1024 - } - ], - "systemProperties": null, - "resources": { - "disableOOMKiller": false, - "memory": { - "limit": 0, - "reservation": 0, - "swap": 0, - "kernel": 0, - "swappiness": -1 - }, - "cpu": { - "shares": 0, - "quota": 0, - "period": 0, - "realtimeRuntime": 0, - "realtimePeriod": 0, - "cpus": "", - "mems": "" - }, - "blockIO": { - "blkioWeight": 0, - "blkioWeightDevice": "", - "blkioThrottleReadBpsDevice": "", - "blkioThrottleWriteBpsDevice": "", - "blkioThrottleReadIopsDevice": "", - "blkioThrottleWriteIopsDevice": "" - }, - "hugepageLimits": null, - "network": { - "classId": "", - "priorities": null - } - }, - "namespaces": [ - { - "type": "pid", - "path": "" - }, - { - "type": "network", - "path": "" - }, - { - "type": "ipc", - "path": "" - }, - { - "type": "uts", - "path": "" - }, - { - "type": "mount", - "path": "" - } - ], - "capabilities": [ - "AUDIT_WRITE", - "KILL", - "NET_BIND_SERVICE" - ], - "devices": [ - { - "type": 99, - "path": "/dev/null", - "major": 1, - "minor": 3, - "permissions": "rwm", - "fileMode": 438, - "uid": 0, - "gid": 0 - }, - { - "type": 99, - "path": "/dev/random", - "major": 1, - "minor": 8, - "permissions": "rwm", - "fileMode": 438, - "uid": 0, - "gid": 0 - }, - { - "type": 99, - "path": "/dev/full", - "major": 1, - "minor": 7, - "permissions": "rwm", - "fileMode": 438, - "uid": 0, - "gid": 0 - }, - { - "type": 99, - "path": "/dev/tty", - "major": 5, - "minor": 0, - "permissions": "rwm", - "fileMode": 438, - "uid": 0, - "gid": 0 - }, - { - "type": 99, - "path": "/dev/zero", - "major": 1, - "minor": 5, - "permissions": "rwm", - "fileMode": 438, - "uid": 0, - "gid": 0 - }, - { - "type": 99, - "path": "/dev/urandom", - "major": 1, - "minor": 9, - "permissions": "rwm", - "fileMode": 438, - "uid": 0, - "gid": 0 - } - ], - } + "mounts": [ + { + "type": "proc", + "source": "proc", + "destination": "/proc", + "options": null + }, + { + "type": "tmpfs", + "source": "tmpfs", + "destination": "/dev", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "type": "devpts", + "source": "devpts", + "destination": "/dev/pts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "type": "tmpfs", + "source": "shm", + "destination": "/dev/shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "type": "mqueue", + "source": "mqueue", + "destination": "/dev/mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "type": "sysfs", + "source": "sysfs", + "destination": "/sys", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "type": "cgroup", + "source": "cgroup", + "destination": "/sys/fs/cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "hooks": { + "prestart": null, + "poststop": null + }, + "linux": { + "uidMappings": null, + "gidMappings": null, + "rlimits": [ + { + "type": 7, + "hard": 1024, + "soft": 1024 + } + ], + "sysctl": null, + "resources": { + "disableOOMKiller": false, + "memory": { + "limit": 0, + "reservation": 0, + "swap": 0, + "kernel": 0, + "swappiness": -1 + }, + "cpu": { + "shares": 0, + "quota": 0, + "period": 0, + "realtimeRuntime": 0, + "realtimePeriod": 0, + "cpus": "", + "mems": "" + }, + "blockIO": { + "blkioWeight": 0, + "blkioWeightDevice": "", + "blkioThrottleReadBpsDevice": "", + "blkioThrottleWriteBpsDevice": "", + "blkioThrottleReadIopsDevice": "", + "blkioThrottleWriteIopsDevice": "" + }, + "hugepageLimits": null, + "network": { + "classId": "", + "priorities": null + } + }, + "namespaces": [ + { + "type": "pid", + "path": "" + }, + { + "type": "network", + "path": "" + }, + { + "type": "ipc", + "path": "" + }, + { + "type": "uts", + "path": "" + }, + { + "type": "mount", + "path": "" + } + ], + "devices": [ + { + "path": "/dev/null", + "type": 99, + "major": 1, + "minor": 3, + "permissions": "rwm", + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/random", + "type": 99, + "major": 1, + "minor": 8, + "permissions": "rwm", + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/full", + "type": 99, + "major": 1, + "minor": 7, + "permissions": "rwm", + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/tty", + "type": 99, + "major": 5, + "minor": 0, + "permissions": "rwm", + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/zero", + "type": 99, + "major": 1, + "minor": 5, + "permissions": "rwm", + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/urandom", + "type": 99, + "major": 1, + "minor": 9, + "permissions": "rwm", + "fileMode": 438, + "uid": 0, + "gid": 0 + } + ], + "apparmorProfile": "", + "selinuxProcessLabel": "", + "seccomp": { + "defaultAction": "SCMP_ACT_ALLOW", + "syscalls": [] + }, + "rootfsPropagation": "" + } } ``` @@ -280,8 +352,8 @@ To test using Docker's `busybox` image follow these steps: mkdir rootfs tar -C rootfs -xf busybox.tar ``` -* Create a file called `config.json` using the example from above. You can also -generate a spec using `runc spec`, redirecting the output into `config.json` +* Create `config.json` and `runtime.json` using the example from above. You can also +generate a spec using `runc spec`, which will create those files for you. * Execute `runc start` and you should be placed into a shell where you can run `ps`: ``` $ runc start diff --git a/restore.go b/restore.go index 8f4a71a6401..cf7be2d3e71 100644 --- a/restore.go +++ b/restore.go @@ -29,11 +29,11 @@ var restoreCommand = cli.Command{ if imagePath == "" { imagePath = getDefaultImagePath(context) } - spec, err := loadSpec(context.Args().First()) + spec, rspec, err := loadSpec() if err != nil { fatal(err) } - config, err := createLibcontainerConfig(context.GlobalString("id"), spec) + config, err := createLibcontainerConfig(context.GlobalString("id"), spec, rspec) if err != nil { fatal(err) } diff --git a/spec.go b/spec.go index 52f171bc8fa..11779566765 100644 --- a/spec.go +++ b/spec.go @@ -5,6 +5,7 @@ package main import ( "encoding/json" "fmt" + "io/ioutil" "os" "path/filepath" "runtime" @@ -22,6 +23,10 @@ import ( var specCommand = cli.Command{ Name: "spec", Usage: "create a new specification file", + Flags: []cli.Flag{ + cli.StringFlag{Name: "config-output, c", Value: "config.json", Usage: "path to spec file for writing"}, + cli.StringFlag{Name: "runtime-output, r", Value: "runtime.json", Usage: "path for runtime file for writing"}, + }, Action: func(context *cli.Context) { spec := specs.LinuxSpec{ Spec: specs.Spec{ @@ -46,52 +51,93 @@ var specCommand = cli.Command{ }, }, Hostname: "shell", + MountPoints: []specs.MountPoint{ + { + Name: "proc", + Path: "/proc", + }, + { + Name: "dev", + Path: "/dev", + }, + { + Name: "devpts", + Path: "/dev/pts", + }, + { + Name: "shm", + Path: "/dev/shm", + }, + { + Name: "mqueue", + Path: "/dev/mqueue", + }, + { + Name: "sysfs", + Path: "/sys", + }, + { + Name: "cgroup", + Path: "/sys/fs/cgroup", + }, + }, + }, + Linux: specs.Linux{ + Capabilities: []string{ + "AUDIT_WRITE", + "KILL", + "NET_BIND_SERVICE", + }, + }, + } + rspec := specs.LinuxRuntimeSpec{ + RuntimeSpec: specs.RuntimeSpec{ Mounts: []specs.Mount{ { Type: "proc", Source: "proc", Destination: "/proc", - Options: "", + Options: nil, }, { Type: "tmpfs", Source: "tmpfs", Destination: "/dev", - Options: "nosuid,strictatime,mode=755,size=65536k", + Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, }, { Type: "devpts", Source: "devpts", Destination: "/dev/pts", - Options: "nosuid,noexec,newinstance,ptmxmode=0666,mode=0620,gid=5", + Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, }, { Type: "tmpfs", Source: "shm", Destination: "/dev/shm", - Options: "nosuid,noexec,nodev,mode=1777,size=65536k", + Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, }, { Type: "mqueue", Source: "mqueue", Destination: "/dev/mqueue", - Options: "nosuid,noexec,nodev", + Options: []string{"nosuid", "noexec", "nodev"}, }, { Type: "sysfs", Source: "sysfs", Destination: "/sys", - Options: "nosuid,noexec,nodev", + Options: []string{"nosuid", "noexec", "nodev"}, }, { Type: "cgroup", Source: "cgroup", Destination: "/sys/fs/cgroup", - Options: "nosuid,noexec,nodev,relatime,ro", + Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, }, }, }, - Linux: specs.Linux{ + Linux: specs.LinuxRuntime{ Namespaces: []specs.Namespace{ { Type: "pid", @@ -109,11 +155,6 @@ var specCommand = cli.Command{ Type: "mount", }, }, - Capabilities: []string{ - "AUDIT_WRITE", - "KILL", - "NET_BIND_SERVICE", - }, Rlimits: []specs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, @@ -121,7 +162,6 @@ var specCommand = cli.Command{ Soft: uint64(1024), }, }, - Devices: []specs.Device{ { Type: 'c', @@ -199,7 +239,16 @@ var specCommand = cli.Command{ if err != nil { logrus.Fatal(err) } - fmt.Printf("%s", data) + if err := ioutil.WriteFile(context.String("config-output"), data, 0666); err != nil { + logrus.Fatal(err) + } + rdata, err := json.MarshalIndent(&rspec, "", "\t") + if err != nil { + logrus.Fatal(err) + } + if err := ioutil.WriteFile(context.String("runtime-output"), rdata, 0666); err != nil { + logrus.Fatal(err) + } }, } @@ -214,23 +263,34 @@ var namespaceMapping = map[string]configs.NamespaceType{ // loadSpec loads the specification from the provided path. // If the path is empty then the default path will be "config.json" -func loadSpec(path string) (*specs.LinuxSpec, error) { - if path == "" { - path = "config.json" - } - f, err := os.Open(path) +func loadSpec() (spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, err error) { + cPath := "config.json" + rPath := "runtime.json" + cf, err := os.Open(cPath) if err != nil { if os.IsNotExist(err) { - return nil, fmt.Errorf("JSON specification file for %s not found", path) + return nil, nil, fmt.Errorf("JSON specification file for %s not found", cPath) } - return nil, err + return } - defer f.Close() - var s *specs.LinuxSpec - if err := json.NewDecoder(f).Decode(&s); err != nil { - return nil, err + runtime := true + rf, err := os.Open(rPath) + if err != nil { + if !os.IsNotExist(err) { + return + } + runtime = false + } + defer rf.Close() + if err = json.NewDecoder(cf).Decode(&spec); err != nil { + return + } + if runtime { + if err = json.NewDecoder(rf).Decode(&rspec); err != nil { + return + } } - return s, checkSpecVersion(s) + return spec, rspec, checkSpecVersion(spec) } // checkSpecVersion makes sure that the spec version matches runc's while we are in the initial @@ -242,7 +302,7 @@ func checkSpecVersion(s *specs.LinuxSpec) error { return nil } -func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec) (*configs.Config, error) { +func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) (*configs.Config, error) { cwd, err := os.Getwd() if err != nil { return nil, err @@ -258,7 +318,7 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec) (*config Hostname: spec.Hostname, Privatefs: true, } - for _, ns := range spec.Linux.Namespaces { + for _, ns := range rspec.Linux.Namespaces { t, exists := namespaceMapping[ns.Type] if !exists { return nil, fmt.Errorf("namespace %q does not exist", ns) @@ -272,19 +332,26 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec) (*config }, } } - for _, m := range spec.Mounts { - config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m)) +loop: + for _, mp := range spec.MountPoints { + for _, m := range rspec.Mounts { + if mp.Path == m.Destination { + config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m)) + continue loop + } + } + return nil, fmt.Errorf("Mount with Path %v not found in runtime config", mp.Path) } - if err := createDevices(spec, config); err != nil { + if err := createDevices(rspec, config); err != nil { return nil, err } - if err := setupUserNamespace(spec, config); err != nil { + if err := setupUserNamespace(rspec, config); err != nil { return nil, err } - for _, rlimit := range spec.Linux.Rlimits { + for _, rlimit := range rspec.Linux.Rlimits { config.Rlimits = append(config.Rlimits, createLibContainerRlimit(rlimit)) } - c, err := createCgroupConfig(cgroupName, spec, config.Devices) + c, err := createCgroupConfig(cgroupName, rspec, config.Devices) if err != nil { return nil, err } @@ -298,14 +365,14 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec) (*config "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", } } - seccomp, err := setupSeccomp(&spec.Linux.Seccomp) + seccomp, err := setupSeccomp(&rspec.Linux.Seccomp) if err != nil { return nil, err } config.Seccomp = seccomp - config.Sysctl = spec.Linux.Sysctl - config.ProcessLabel = spec.Linux.SelinuxProcessLabel - config.AppArmorProfile = spec.Linux.ApparmorProfile + config.Sysctl = rspec.Linux.Sysctl + config.ProcessLabel = rspec.Linux.SelinuxProcessLabel + config.AppArmorProfile = rspec.Linux.ApparmorProfile return config, nil } @@ -326,7 +393,7 @@ func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount { } } -func createCgroupConfig(name string, spec *specs.LinuxSpec, devices []*configs.Device) (*configs.Cgroup, error) { +func createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*configs.Device) (*configs.Cgroup, error) { myCgroupPath, err := cgroups.GetThisCgroupDir("devices") if err != nil { return nil, err @@ -372,7 +439,7 @@ func createCgroupConfig(name string, spec *specs.LinuxSpec, devices []*configs.D return c, nil } -func createDevices(spec *specs.LinuxSpec, config *configs.Config) error { +func createDevices(spec *specs.LinuxRuntimeSpec, config *configs.Config) error { for _, d := range spec.Linux.Devices { device := &configs.Device{ Type: d.Type, @@ -397,7 +464,7 @@ func setReadonly(config *configs.Config) { } } -func setupUserNamespace(spec *specs.LinuxSpec, config *configs.Config) error { +func setupUserNamespace(spec *specs.LinuxRuntimeSpec, config *configs.Config) error { if len(spec.Linux.UIDMappings) == 0 { return nil } @@ -440,7 +507,7 @@ func createLibContainerRlimit(rlimit specs.Rlimit) configs.Rlimit { // parseMountOptions parses the string and returns the flags and any mount data that // it contains. -func parseMountOptions(options string) (int, string) { +func parseMountOptions(options []string) (int, string) { var ( flag int data []string @@ -483,7 +550,7 @@ func parseMountOptions(options string) (int, string) { "sync": {false, syscall.MS_SYNCHRONOUS}, "unbindable": {false, syscall.MS_UNBINDABLE}, } - for _, o := range strings.Split(options, ",") { + for _, o := range options { // If the option does not exist in the flags table or the flag // is not supported on the platform, // then it is a data value for a specific fs type diff --git a/start.go b/start.go index 465e97d02cb..ee51ffb6dd5 100644 --- a/start.go +++ b/start.go @@ -21,14 +21,14 @@ var startCommand = cli.Command{ Name: "start", Usage: "create and run a container", Action: func(context *cli.Context) { - spec, err := loadSpec(context.Args().First()) + spec, rspec, err := loadSpec() if err != nil { fatal(err) } notifySocket := os.Getenv("NOTIFY_SOCKET") if notifySocket != "" { - setupSdNotify(spec, notifySocket) + setupSdNotify(spec, rspec, notifySocket) } listenFds := os.Getenv("LISTEN_FDS") @@ -41,7 +41,7 @@ var startCommand = cli.Command{ if os.Geteuid() != 0 { logrus.Fatal("runc should be run as root") } - status, err := startContainer(context, spec) + status, err := startContainer(context, spec, rspec) if err != nil { logrus.Fatalf("Container start failed: %v", err) } @@ -63,8 +63,8 @@ func init() { } } -func startContainer(context *cli.Context, spec *specs.LinuxSpec) (int, error) { - config, err := createLibcontainerConfig(context.GlobalString("id"), spec) +func startContainer(context *cli.Context, spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) (int, error) { + config, err := createLibcontainerConfig(context.GlobalString("id"), spec, rspec) if err != nil { return -1, err } @@ -117,9 +117,10 @@ func startContainer(context *cli.Context, spec *specs.LinuxSpec) (int, error) { // If systemd is supporting sd_notify protocol, this function will add support // for sd_notify protocol from within the container. -func setupSdNotify(spec *specs.LinuxSpec, notifySocket string) { - spec.Mounts = append(spec.Mounts, specs.Mount{Type: "bind", Source: notifySocket, Destination: notifySocket, Options: "bind"}) +func setupSdNotify(spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, notifySocket string) { + spec.MountPoints = append(spec.MountPoints, specs.MountPoint{Name: "sdNotify", Path: notifySocket}) spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket)) + rspec.Mounts = append(rspec.Mounts, specs.Mount{Type: "bind", Source: notifySocket, Destination: notifySocket, Options: []string{"bind"}}) } // If systemd is supporting on-demand socket activation, this function will add support