Skip to content

Commit

Permalink
Convert poll/select to operate more directly on linux.PollFD
Browse files Browse the repository at this point in the history
Current, doPoll copies the user struct pollfd array into a
[]syscalls.PollFD, which contains internal kdefs.FD and
waiter.EventMask types. While these are currently binary-compatible with
the Linux versions, we generally discourage copying directly to internal
types (someone may inadvertantly change kdefs.FD to uint64).

Instead, copy directly to a []linux.PollFD, which will certainly be
binary compatible. Most of syscalls/polling.go is included directly into
syscalls/linux/sys_poll.go, as it can then operate directly on
linux.PollFD. The additional syscalls.PollFD type is providing little
value.

I've also added explicit conversion functions for waiter.EventMask,
which creates the possibility of a different binary format.

PiperOrigin-RevId: 244042947
Change-Id: I24e5b642002a32b3afb95a9dcb80d4acd1288abf
  • Loading branch information
prattmic authored and shentubot committed Apr 17, 2019
1 parent e091b4e commit 08d99c5
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 167 deletions.
6 changes: 4 additions & 2 deletions pkg/fdnotifier/fdnotifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// +build linux

// Package fdnotifier contains an adapter that translates IO events (e.g., a
// file became readable/writable) from native FDs to the notifications in the
// waiter package. It uses epoll in edge-triggered mode to receive notifications
Expand Down Expand Up @@ -70,7 +72,7 @@ func (n *notifier) waitFD(fd int32, fi *fdInfo, mask waiter.EventMask) error {
}

e := syscall.EpollEvent{
Events: uint32(mask) | -syscall.EPOLLET,
Events: mask.ToLinux() | -syscall.EPOLLET,
Fd: fd,
}

Expand Down Expand Up @@ -155,7 +157,7 @@ func (n *notifier) waitAndNotify() error {
n.mu.Lock()
for i := 0; i < v; i++ {
if fi, ok := n.fdMap[e[i].Fd]; ok {
fi.queue.Notify(waiter.EventMask(e[i].Events))
fi.queue.Notify(waiter.EventMaskFromLinux(e[i].Events))
}
}
n.mu.Unlock()
Expand Down
6 changes: 4 additions & 2 deletions pkg/fdnotifier/poll_unsafe.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// +build linux

package fdnotifier

import (
Expand All @@ -30,7 +32,7 @@ func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask {
revents int16
}{
fd: fd,
events: int16(mask),
events: int16(mask.ToLinux()),
}

for {
Expand All @@ -51,7 +53,7 @@ func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask {
}

// Otherwise we got the ready events in the revents field.
return waiter.EventMask(e.revents)
return waiter.EventMaskFromLinux(uint32(e.revents))
}
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/sentry/socket/rpcinet/notifier/notifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (n *Notifier) waitFD(fd uint32, fi *fdInfo, mask waiter.EventMask) error {
}

e := pb.EpollEvent{
Events: uint32(mask) | -syscall.EPOLLET,
Events: mask.ToLinux() | -syscall.EPOLLET,
Fd: fd,
}

Expand Down Expand Up @@ -178,7 +178,7 @@ func (n *Notifier) waitAndNotify() error {
n.mu.Lock()
for _, e := range res.(*pb.EpollWaitResponse_Events).Events.Events {
if fi, ok := n.fdMap[e.Fd]; ok {
fi.queue.Notify(waiter.EventMask(e.Events))
fi.queue.Notify(waiter.EventMaskFromLinux(e.Events))
}
}
n.mu.Unlock()
Expand Down Expand Up @@ -214,7 +214,7 @@ func (n *Notifier) HasFD(fd uint32) bool {
// although the syscall is non-blocking.
func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.EventMask {
for {
id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: uint32(mask)}}}, false /* ignoreResult */)
id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: mask.ToLinux()}}}, false /* ignoreResult */)
<-c

res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_Poll).Poll.Result
Expand All @@ -225,6 +225,6 @@ func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.Even
return mask
}

return waiter.EventMask(res.(*pb.PollResponse_Events).Events)
return waiter.EventMaskFromLinux(res.(*pb.PollResponse_Events).Events)
}
}
2 changes: 0 additions & 2 deletions pkg/sentry/syscalls/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@ go_library(
name = "syscalls",
srcs = [
"epoll.go",
"polling.go",
"syscalls.go",
],
importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/sentry/arch",
"//pkg/sentry/fs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/epoll",
"//pkg/sentry/kernel/kdefs",
Expand Down
2 changes: 1 addition & 1 deletion pkg/sentry/syscalls/linux/sys_epoll.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
flags |= epoll.EdgeTriggered
}

mask = waiter.EventMask(e.Events)
mask = waiter.EventMaskFromLinux(e.Events)
data[0] = e.Fd
data[1] = e.Pad
}
Expand Down
133 changes: 120 additions & 13 deletions pkg/sentry/syscalls/linux/sys_poll.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ import (

"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
"gvisor.googlesource.com/gvisor/pkg/sentry/syscalls"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
"gvisor.googlesource.com/gvisor/pkg/waiter"
Expand All @@ -37,23 +37,130 @@ const fileCap = 1024 * 1024
const (
// selectReadEvents is analogous to the Linux kernel's
// fs/select.c:POLLIN_SET.
selectReadEvents = waiter.EventIn | waiter.EventHUp | waiter.EventErr
selectReadEvents = linux.POLLIN | linux.POLLHUP | linux.POLLERR

// selectWriteEvents is analogous to the Linux kernel's
// fs/select.c:POLLOUT_SET.
selectWriteEvents = waiter.EventOut | waiter.EventErr
selectWriteEvents = linux.POLLOUT | linux.POLLERR

// selectExceptEvents is analogous to the Linux kernel's
// fs/select.c:POLLEX_SET.
selectExceptEvents = waiter.EventPri
selectExceptEvents = linux.POLLPRI
)

// pollState tracks the associated file descriptor and waiter of a PollFD.
type pollState struct {
file *fs.File
waiter waiter.Entry
}

// initReadiness gets the current ready mask for the file represented by the FD
// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is
// used to register with the file for event notifications, and a reference to
// the file is stored in "state".
func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan struct{}) {
if pfd.FD < 0 {
pfd.REvents = 0
return
}

file := t.FDMap().GetFile(kdefs.FD(pfd.FD))
if file == nil {
pfd.REvents = linux.POLLNVAL
return
}

if ch == nil {
defer file.DecRef()
} else {
state.file = file
state.waiter, _ = waiter.NewChannelEntry(ch)
file.EventRegister(&state.waiter, waiter.EventMaskFromLinux(uint32(pfd.Events)))
}

r := file.Readiness(waiter.EventMaskFromLinux(uint32(pfd.Events)))
pfd.REvents = int16(r.ToLinux()) & pfd.Events
}

// releaseState releases all the pollState in "state".
func releaseState(state []pollState) {
for i := range state {
if state[i].file != nil {
state[i].file.EventUnregister(&state[i].waiter)
state[i].file.DecRef()
}
}
}

// pollBlock polls the PollFDs in "pfd" with a bounded time specified in "timeout"
// when "timeout" is greater than zero.
//
// pollBlock returns the remaining timeout, which is always 0 on a timeout; and 0 or
// positive if interrupted by a signal.
func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.Duration, uintptr, error) {
var ch chan struct{}
if timeout != 0 {
ch = make(chan struct{}, 1)
}

// Register for event notification in the files involved if we may
// block (timeout not zero). Once we find a file that has a non-zero
// result, we stop registering for events but still go through all files
// to get their ready masks.
state := make([]pollState, len(pfd))
defer releaseState(state)
n := uintptr(0)
for i := range pfd {
initReadiness(t, &pfd[i], &state[i], ch)
if pfd[i].REvents != 0 {
n++
ch = nil
}
}

if timeout == 0 {
return timeout, n, nil
}

forever := timeout < 0

for n == 0 {
var err error
// Wait for a notification.
timeout, err = t.BlockWithTimeout(ch, !forever, timeout)
if err != nil {
if err == syserror.ETIMEDOUT {
err = nil
}
return timeout, 0, err
}

// We got notified, count how many files are ready. If none,
// then this was a spurious notification, and we just go back
// to sleep with the remaining timeout.
for i := range state {
if state[i].file == nil {
continue
}

r := state[i].file.Readiness(waiter.EventMaskFromLinux(uint32(pfd[i].Events)))
rl := int16(r.ToLinux()) & pfd[i].Events
if rl != 0 {
pfd[i].REvents = rl
n++
}
}
}

return timeout, n, nil
}

func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) {
if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) {
return timeout, 0, syserror.EINVAL
}

pfd := make([]syscalls.PollFD, nfds)
pfd := make([]linux.PollFD, nfds)
if nfds > 0 {
if _, err := t.CopyIn(pfdAddr, &pfd); err != nil {
return timeout, 0, err
Expand All @@ -65,9 +172,9 @@ func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Durati
// polling, changing event masks here is an application-visible difference.
// (Linux also doesn't copy out event masks at all, only revents.)
for i := range pfd {
pfd[i].Events |= waiter.EventHUp | waiter.EventErr
pfd[i].Events |= linux.POLLHUP | linux.POLLERR
}
remainingTimeout, n, err := syscalls.Poll(t, pfd, timeout)
remainingTimeout, n, err := pollBlock(t, pfd, timeout)
err = syserror.ConvertIntr(err, syserror.EINTR)

// The poll entries are copied out regardless of whether
Expand Down Expand Up @@ -136,8 +243,8 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
}

// Build the PollFD array.
pfd := make([]syscalls.PollFD, 0, fdCount)
fd := kdefs.FD(0)
pfd := make([]linux.PollFD, 0, fdCount)
var fd int32
for i := 0; i < byteCount; i++ {
rV, wV, eV := r[i], w[i], e[i]
v := rV | wV | eV
Expand All @@ -148,13 +255,13 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
// immediately to ensure we don't leak. Note, another thread
// might be about to close fd. This is racy, but that's
// OK. Linux is racy in the same way.
file := t.FDMap().GetFile(fd)
file := t.FDMap().GetFile(kdefs.FD(fd))
if file == nil {
return 0, syserror.EBADF
}
file.DecRef()

mask := waiter.EventMask(0)
var mask int16
if (rV & m) != 0 {
mask |= selectReadEvents
}
Expand All @@ -167,7 +274,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
mask |= selectExceptEvents
}

pfd = append(pfd, syscalls.PollFD{
pfd = append(pfd, linux.PollFD{
FD: fd,
Events: mask,
})
Expand All @@ -179,7 +286,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
}

// Do the syscall, then count the number of bits set.
_, _, err := syscalls.Poll(t, pfd, timeout)
_, _, err := pollBlock(t, pfd, timeout)
if err != nil {
return 0, syserror.ConvertIntr(err, syserror.EINTR)
}
Expand Down
Loading

0 comments on commit 08d99c5

Please sign in to comment.