Skip to content

Commit

Permalink
runc-shim: refuse to start execs after init exits
Browse files Browse the repository at this point in the history
The runc task state machine prevents execs from being created after the
init process has exited, but there are no guards against starting a
created exec after the init process has exited. That leaves a small
window for starting an exec to race our handling of the init process
exiting. Normally this is not an issue in practice: the kernel will
atomically kill all processes in a PID namespace when its "init" process
terminates, and will not allow new processes to fork(2) into the PID
namespace afterwards. Therefore the racing exec is guaranteed by the
kernel to not be running after the init process terminates. On the other
hand, when the container does not have a private PID namespace (i.e. the
container's init process is not the "init" process of the container's
PID namespace), the kernel does not automatically kill other container
processes on init exit and will happily allow runc to start an exec
process at any time. It is the runc shim's responsibility to clean up
the container when the init process exits in this situation by killing
all the container's remaining processes. Block execs from being started
after the container's init process has exited to prevent the processes
from leaking, and to avoid violating the task service's assumption that
an exec can be running iff the init process is also running.

Signed-off-by: Cory Snider <csnider@mirantis.com>
(cherry picked from commit e735791)
Signed-off-by: Laura Brehm <laurabrehm@hey.com>
  • Loading branch information
corhere authored and laurazard committed Sep 6, 2024
1 parent 760935e commit 686c694
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions runtime/v2/runc/task/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func NewTaskService(ctx context.Context, publisher shim.Publisher, sd shutdown.S
containers: make(map[string]*runc.Container),
running: make(map[int][]containerProcess),
pendingExecs: make(map[*runc.Container]int),
execable: make(map[*runc.Container]bool),
exitSubscribers: make(map[*map[int][]runcC.Exit]struct{}),
}
go s.processExits()
Expand Down Expand Up @@ -117,6 +118,12 @@ type service struct {
lifecycleMu sync.Mutex
running map[int][]containerProcess // pid -> running process, guarded by lifecycleMu
pendingExecs map[*runc.Container]int // container -> num pending execs, guarded by lifecycleMu
// container -> execs can be started, guarded by lifecycleMu.
// Execs can be started if the container's init process (read: pid, not [process.Init])
// has been started and not yet reaped by the shim.
// Note that this flag gets updated before the container's [process.Init.Status]
// is transitioned to "stopped".
execable map[*runc.Container]bool
// Subscriptions to exits for PIDs. Adding/deleting subscriptions and
// dereferencing the subscription pointers must only be done while holding
// lifecycleMu.
Expand Down Expand Up @@ -230,6 +237,9 @@ func (s *service) preStart(c *runc.Container) (handleStarted func(*runc.Containe
Container: c,
Process: p,
})
if init {
s.execable[c] = true
}
s.lifecycleMu.Unlock()
}
}
Expand Down Expand Up @@ -304,6 +314,10 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
if r.ExecID == "" {
cinit = container
} else {
if !s.execable[container] {
s.lifecycleMu.Unlock()
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container %s init process is not running", container.ID)
}
s.pendingExecs[container]++
}
handleStarted, cleanup := s.preStart(cinit)
Expand Down Expand Up @@ -679,6 +693,9 @@ func (s *service) processExits() {
var cps, skipped []containerProcess
for _, cp := range s.running[e.Pid] {
_, init := cp.Process.(*process.Init)
if init {
delete(s.execable, cp.Container)
}
if init && s.pendingExecs[cp.Container] != 0 {
// This exit relates to a container for which we have pending execs. In
// order to ensure order between execs and the init process for a given
Expand Down

0 comments on commit 686c694

Please sign in to comment.