Skip to content

Commit

Permalink
allow running dockerd in an unprivileged user namespace (rootless m…
Browse files Browse the repository at this point in the history
…ode)

Please refer to `docs/rootless.md`.

TLDR:
 * Make sure `/etc/subuid` and `/etc/subgid` contain the entry for you
 * `dockerd-rootless.sh --experimental`
 * `docker -H unix://$XDG_RUNTIME_DIR/docker.sock run ...`

Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
  • Loading branch information
AkihiroSuda committed Oct 15, 2018
1 parent a268955 commit d210e20
Show file tree
Hide file tree
Showing 32 changed files with 2,235 additions and 11 deletions.
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,18 @@ ENV INSTALL_BINARY_NAME=tini
COPY hack/dockerfile/install/$INSTALL_BINARY_NAME.installer ./
RUN PREFIX=/build/ ./install.sh $INSTALL_BINARY_NAME

FROM base AS rootlesskit
ENV INSTALL_BINARY_NAME=rootlesskit
COPY hack/dockerfile/install/install.sh ./install.sh
COPY hack/dockerfile/install/$INSTALL_BINARY_NAME.installer ./
RUN PREFIX=/build/ ./install.sh $INSTALL_BINARY_NAME

FROM base AS slirp4netns
RUN apt-get update && apt-get install -y automake autotools-dev libtool
ENV INSTALL_BINARY_NAME=slirp4netns
COPY hack/dockerfile/install/install.sh ./install.sh
COPY hack/dockerfile/install/$INSTALL_BINARY_NAME.installer ./
RUN PREFIX=/build/ ./install.sh $INSTALL_BINARY_NAME

# TODO: Some of this is only really needed for testing, it would be nice to split this up
FROM runtime-dev AS dev
Expand Down Expand Up @@ -226,6 +237,9 @@ RUN cd /docker-py \
&& pip install docker-pycreds==0.2.1 \
&& pip install yamllint==1.5.0 \
&& pip install -r test-requirements.txt
COPY --from=rootlesskit /build/ /usr/local/bin/
COPY --from=slirp4netns /build/ /usr/local/bin/
COPY ./contrib/dockerd-rootless.sh /usr/local/bin

ENV PATH=/usr/local/cli:$PATH
ENV DOCKER_BUILDTAGS apparmor seccomp selinux
Expand Down
5 changes: 5 additions & 0 deletions cmd/dockerd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/opts"
"github.com/docker/docker/registry"
"github.com/docker/docker/rootless"
"github.com/spf13/pflag"
)

Expand Down Expand Up @@ -80,6 +81,10 @@ func installCommonConfigFlags(conf *config.Config, flags *pflag.FlagSet) {

conf.MaxConcurrentDownloads = &maxConcurrentDownloads
conf.MaxConcurrentUploads = &maxConcurrentUploads

// Mostly users don't need to set this flag explicitly.
// However, when running Docker as the mapped root with in a rootless Docker, users might need to set this flag explicitly.
flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithNonRootUsername, "Enable rootless mode (experimental)")
}

func installRegistryServiceFlags(options *registry.ServiceOptions, flags *pflag.FlagSet) {
Expand Down
22 changes: 22 additions & 0 deletions cmd/dockerd/config_common_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
package main

import (
"os"
"path/filepath"
"strings"

"github.com/docker/docker/api/types"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/opts"
"github.com/docker/docker/rootless"
"github.com/spf13/pflag"
)

Expand All @@ -15,6 +20,23 @@ var (
defaultExecRoot = "/var/run/docker"
)

func init() {
if rootless.RunningWithNonRootUsername {
// pam_systemd sets XDG_RUNTIME_DIR but not other dirs.
if xdgDataHome := os.Getenv("XDG_DATA_HOME"); xdgDataHome != "" {
dirs := strings.Split(xdgDataHome, ":")
defaultDataRoot = filepath.Join(dirs[0], "docker")
} else if home := os.Getenv("HOME"); home != "" {
defaultDataRoot = filepath.Join(home, ".local", "share", "docker")
}
if xdgRuntimeDir := os.Getenv("XDG_RUNTIME_DIR"); xdgRuntimeDir != "" {
dirs := strings.Split(xdgRuntimeDir, ":")
defaultPidFile = filepath.Join(dirs[0], "docker.pid")
defaultExecRoot = filepath.Join(dirs[0], "docker")
}
}
}

// installUnixConfigFlags adds command-line options to the top-level flag parser for
// the current process that are common across Unix platforms.
func installUnixConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
Expand Down
14 changes: 13 additions & 1 deletion cmd/dockerd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/docker/docker/pkg/signal"
"github.com/docker/docker/pkg/system"
"github.com/docker/docker/plugin"
"github.com/docker/docker/rootless"
"github.com/docker/docker/runconfig"
"github.com/docker/go-connections/tlsconfig"
swarmapi "github.com/docker/swarmkit/api"
Expand Down Expand Up @@ -93,6 +94,17 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
if cli.Config.Experimental {
logrus.Warn("Running experimental build")
}
// return human-friendly error before creating files
if runtime.GOOS == "linux" && os.Geteuid() != 0 {
return fmt.Errorf("rootless mode needs dockerd to be executed in userns")
}
if cli.Config.Rootless {
logrus.Warn("Running in rootless mode (experimental). Cgroups, AppArmor, and CRIU are not likely to work.")
if !cli.Config.Experimental {
return fmt.Errorf("rootless mode is only supported when experimental is enabled")
}
// TODO: make sure mountns and netns are unshared.
}

logrus.SetFormatter(&logrus.TextFormatter{
TimestampFormat: jsonmessage.RFC3339NanoFixed,
Expand Down Expand Up @@ -589,7 +601,7 @@ func loadListeners(cli *DaemonCli, serverConfig *apiserver.Config) ([]string, er
var hosts []string
for i := 0; i < len(cli.Config.Hosts); i++ {
var err error
if cli.Config.Hosts[i], err = dopts.ParseHost(cli.Config.TLS, cli.Config.Hosts[i]); err != nil {
if cli.Config.Hosts[i], err = dopts.ParseHost(cli.Config.TLS, cli.Config.Hosts[i], rootless.RunningWithNonRootUsername); err != nil {
return nil, errors.Wrapf(err, "error parsing -H %s", cli.Config.Hosts[i])
}

Expand Down
24 changes: 22 additions & 2 deletions cmd/dockerd/daemon_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,37 @@ import (
"os/signal"
"path/filepath"
"strconv"
"strings"

"github.com/containerd/containerd/runtime/v1/linux"
"github.com/docker/docker/cmd/dockerd/hack"
"github.com/docker/docker/daemon"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/libcontainerd/supervisor"
"github.com/docker/docker/rootless"
"github.com/docker/libnetwork/portallocator"
"golang.org/x/sys/unix"
)

const defaultDaemonConfigFile = "/etc/docker/daemon.json"
var (
defaultDaemonConfigDir = "/etc/docker"
defaultDaemonConfigFile = "/etc/docker/daemon.json"
)

func init() {
if rootless.RunningWithNonRootUsername {
// NOTE: CLI uses ~/.docker while the daemon uses ~/.config/docker, because
// ~/.docker was not designed to store daemon configurations.
// In future, the daemon directory may be renamed to ~/.config/moby-engine (?).
if xdgConfigHome := os.Getenv("XDG_CONFIG_HOME"); xdgConfigHome != "" {
dirs := strings.Split(xdgConfigHome, ":")
defaultDaemonConfigDir = filepath.Join(dirs[0], "docker")
} else if home := os.Getenv("HOME"); home != "" {
defaultDaemonConfigDir = filepath.Join(home, ".config", "docker")
}
defaultDaemonConfigFile = filepath.Join(defaultDaemonConfigDir, "daemon.json")
}
}

// setDefaultUmask sets the umask to 0022 to avoid problems
// caused by custom umask
Expand All @@ -34,7 +54,7 @@ func setDefaultUmask() error {
}

func getDaemonConfDir(_ string) string {
return "/etc/docker"
return defaultDaemonConfigDir
}

func (cli *DaemonCli) getPlatformContainerdDaemonOpts() ([]supervisor.DaemonOpt, error) {
Expand Down
36 changes: 36 additions & 0 deletions contrib/dockerd-rootless.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/sh
set -e -x
if [ -z $XDG_RUNTIME_DIR ]; then
echo "XDG_RUNTIME_DIR needs to be set"
exit 1
fi
if [ -z $HOME ]; then
echo "HOME needs to be set"
exit 1
fi

if [ -z $_DOCKERD_ROOTLESS_CHILD ]; then
_DOCKERD_ROOTLESS_CHILD=1
export _DOCKERD_ROOTLESS_CHILD
# Re-exec the script via RootlessKit, so as to create unprivileged {user,mount,network} namespaces.
#
# --net specifies the network stack. slirp4netns, vpnkit, and vdeplug_slirp are supported.
# Currently, slirp4netns is the fastest.
# See https://github.com/rootless-containers/rootlesskit for the benchmark result.
#
# --copy-up allows removing/creating files in the directories by creating tmpfs and symlinks
# * /etc: copy-up is required so as to prevent `/etc/resolv.conf` in the
# namespace from being unexpectedly unmounted when `/etc/resolv.conf` is recreated on the host
# (by either systemd-networkd or NetworkManager)
# * /run: copy-up is required so that we can create /run/docker (hardcoded for plugins) in our namespace
rootlesskit \
--net=slirp4netns --mtu=65520 \
--copy-up=/etc --copy-up=/run \
$0 $@
else
[ $_DOCKERD_ROOTLESS_CHILD = 1 ]
# remove the symlinks for the existing files in the parent namespace if any,
# so that we can create our own files in our mount namespace.
rm -f /run/docker /run/xtables.lock
dockerd $@
fi
1 change: 1 addition & 0 deletions daemon/config/config_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type Config struct {
IpcMode string `json:"default-ipc-mode,omitempty"`
// ResolvConf is the path to the configuration of the host resolver
ResolvConf string `json:"resolv-conf,omitempty"`
Rootless bool `json:"bool,omitempty"`
}

// BridgeConfig stores all the bridge driver specific
Expand Down
1 change: 1 addition & 0 deletions daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
logrus.Warnf("Failed to configure golang's threads limit: %v", err)
}

// ensureDefaultAppArmorProfile does nothing if apparmor is disabled
if err := ensureDefaultAppArmorProfile(); err != nil {
logrus.Errorf(err.Error())
}
Expand Down
3 changes: 3 additions & 0 deletions daemon/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ func (daemon *Daemon) fillSecurityOptions(v *types.Info, sysInfo *sysinfo.SysInf
if rootIDs := daemon.idMapping.RootPair(); rootIDs.UID != 0 || rootIDs.GID != 0 {
securityOptions = append(securityOptions, "name=userns")
}
if daemon.configStore.Rootless {
securityOptions = append(securityOptions, "name=rootless")
}
v.SecurityOptions = securityOptions
}

Expand Down
14 changes: 13 additions & 1 deletion daemon/oci_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ import (
"github.com/docker/docker/oci"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/rootless/specconv"
volumemounts "github.com/docker/docker/volume/mounts"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/devices"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
Expand Down Expand Up @@ -88,7 +90,7 @@ func setDevices(s *specs.Spec, c *container.Container) error {
// Build lists of devices allowed and created within the container.
var devs []specs.LinuxDevice
devPermissions := s.Linux.Resources.Devices
if c.HostConfig.Privileged {
if c.HostConfig.Privileged && !rsystem.RunningInUserNS() {
hostDevices, err := devices.HostDevices()
if err != nil {
return err
Expand Down Expand Up @@ -846,6 +848,11 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e
s.Linux.ReadonlyPaths = c.HostConfig.ReadonlyPaths
}

if daemon.configStore.Rootless {
if err := specconv.ToRootless(&s); err != nil {
return nil, err
}
}
return &s, nil
}

Expand Down Expand Up @@ -874,3 +881,8 @@ func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
}
c.Ulimits = ulimits
}

func toRootless(s *specs.Spec) error {

return nil
}
76 changes: 76 additions & 0 deletions docs/rootless.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Rootless mode (Experimental)

The rootless mode allows running `dockerd` as an unprivileged user, using `user_namespaces(7)`, `mount_namespaces(7)`, `network_namespaces(7)`, and [slirp4netns](https://github.com/rootless-containers/slirp4netns).

No SUID binary is required except `newuidmap` and `newgidmap`.

## Requirements
* `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package on most distros.

* `/etc/subuid` and `/etc/subgid` should contain >= 65536 sub-IDs. e.g. `penguin:231072:65536`.

```console
$ id -u
1001
$ whoami
penguin
$ grep ^$(whoami): /etc/subuid
penguin:231072:65536
$ grep ^$(whoami): /etc/subgid
penguin:231072:65536
```

* Some distros such as Debian (excluding Ubuntu) and Arch Linux require `sudo sh -c "echo 1 > /proc/sys/kernel/unprivileged_userns_clone"`.
* Some distros require `sudo modprobe ip_tables iptable_mangle iptable_nat iptable_filter`.

## Restrictions

* Only `vfs` graphdriver is supported. However, on [Ubuntu](http://kernel.ubuntu.com/git/ubuntu/ubuntu-artful.git/commit/fs/overlayfs?h=Ubuntu-4.13.0-25.29&id=0a414bdc3d01f3b61ed86cfe3ce8b63a9240eba7) and a few distros, `overlay2` and `overlay` are also supported. [Starting with Linux 4.18](https://www.phoronix.com/scan.php?page=news_item&px=Linux-4.18-FUSE), we will be also able to implement FUSE snapshotters.
* Cgroups (including `docker top`) and AppArmor are disabled at the moment. (FIXME: we could enable Cgroups if configured on the host)
* Checkpoint is not supported at the moment.
* Running rootless `dockerd` in rootless/rootful `dockerd` is also possible, but not fully tested.

## Usage

### Daemon

You need to run `dockerd-rootless.sh` instead of `dockerd`.

```console
$ dockerd-rootless.sh --experimental"
```
As Rootless mode is experimental per se, currently you always need to run `dockerd-rootless.sh` with `--experimental`.

Remarks:
* The socket path is set to `$XDG_RUNTIME_DIR/docker.sock` by default. `$XDG_RUNTIME_DIR` is typically set to `/run/user/$UID`.
* The data dir is set to `~/.local/share/docker` by default.
* The exec dir is set to `$XDG_RUNTIME_DIR/docker` by default.
* The config dir is set to `~/.config/docker` (not `~/.docker`) by default.
* The `dockerd-rootless.sh` script executes `dockerd` in its own user, mount, and network namespace. You can enter the namespaces by running `nsenter -U --preserve-credentials -n -m -t $(cat $XDG_RUNTIME_DIR/docker.pid)`.

### Client

You can just use the upstream Docker client but you need to set the socket path explicitly.

```console
$ docker -H unix://$XDG_RUNTIME_DIR/docker.sock run -d nginx
```

### Exposing ports

In addition to exposing container ports to the `dockerd` network namespace, you also need to expose the ports in the `dockerd` network namespace to the host network namespace.

```console
$ docker -H unix://$XDG_RUNTIME_DIR/docker.sock run -d -p 80:80 nginx
$ socat -t -- TCP-LISTEN:8080,reuseaddr,fork EXEC:"nsenter -U -n -t $(cat $XDG_RUNTIME_DIR/docker.pid) socat -t -- STDIN TCP4\:127.0.0.1\:80"
```

In future, `dockerd` will be able to expose the ports automatically. See https://github.com/rootless-containers/rootlesskit/issues/14 .

### Routing ping packets

To route ping packets, you need to set up `net.ipv4.ping_group_range` properly as the root.

```console
$ sudo sh -c "echo 0 2147483647 > /proc/sys/net/ipv4/ping_group_range"
```
34 changes: 34 additions & 0 deletions hack/dockerfile/install/rootlesskit.installer
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/sh


ROOTLESSKIT_COMMIT=d843aadf00d72082fd7a31572ef018d1e792535f

install_rootlesskit() {
case "$1" in
"dynamic")
install_rootlesskit_dynamic
return
;;
"")
export CGO_ENABLED=0
_install_rootlesskit
;;
*)
echo 'Usage: $0 [dynamic]'
;;
esac
}

install_rootlesskit_dynamic() {
export ROOTLESSKIT_LDFLAGS="-linkmode=external" install_rootlesskit
export BUILD_MODE="-buildmode=pie"
_install_rootlesskit
}

_install_rootlesskit() {
echo "Install rootlesskit version $ROOTLESSKIT_COMMIT"
git clone https://github.com/rootless-containers/rootlesskit.git "$GOPATH/src/github.com/rootless-containers/rootlesskit"
cd "$GOPATH/src/github.com/rootless-containers/rootlesskit"
git checkout -q "$ROOTLESSKIT_COMMIT"
go build $BUILD_MODE -ldflags="$ROOTLESSKIT_LDFLAGS" -o ${PREFIX}/rootlesskit github.com/rootless-containers/rootlesskit/cmd/rootlesskit
}
2 changes: 1 addition & 1 deletion hack/dockerfile/install/runc.installer
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

# When updating RUNC_COMMIT, also update runc in vendor.conf accordingly
RUNC_COMMIT=69663f0bd4b60df09991c08812a60108003fa340
RUNC_COMMIT=2c632d1a2de0192c3f18a2542ccb6f30a8719b1f

install_runc() {
# Do not build with ambient capabilities support
Expand Down
Loading

0 comments on commit d210e20

Please sign in to comment.