Skip to content

Commit

Permalink
crio status: add goroutines subcommand
Browse files Browse the repository at this point in the history
The subcommand can be used to query the CRI-O goroutines in the same way
as writing them to disk using `SIGUSR1`. This also allows to query the
goroutines using the HTTP endpoint:

```shell
sudo curl --unix-socket /var/run/crio/crio.sock http://crio/goroutines
```

Signed-off-by: Sascha Grunert <sgrunert@redhat.com>
  • Loading branch information
saschagrunert committed Oct 22, 2024
1 parent c7cabff commit 2665ada
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 39 deletions.
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,13 +251,14 @@ The following API entry points are currently supported:

<!-- markdownlint-disable MD013 -->

| Path | Content-Type | Description |
| ----------------- | ------------------ | ---------------------------------------------------------------------------------- |
| `/info` | `application/json` | General information about the runtime, like `storage_driver` and `storage_root`. |
| `/containers/:id` | `application/json` | Dedicated container information, like `name`, `pid` and `image`. |
| `/config` | `application/toml` | The complete TOML configuration (defaults to `/etc/crio/crio.conf`) used by CRI-O. |
| `/pause/:id` | `application/json` | Pause a running container. |
| `/unpause/:id` | `application/json` | Unpause a paused container. |
| Path | Content-Type | Description |
| ------------------- | ------------------ | ---------------------------------------------------------------------------------- |
| `/info` | `application/json` | General information about the runtime, like `storage_driver` and `storage_root`. |
| `/containers/:id` | `application/json` | Dedicated container information, like `name`, `pid` and `image`. |
| `/config` | `application/toml` | The complete TOML configuration (defaults to `/etc/crio/crio.conf`) used by CRI-O. |
| `/pause/:id` | `application/json` | Pause a running container. |
| `/unpause/:id` | `application/json` | Unpause a paused container. |
| `/debug/goroutines` | `text/plain` | Print the goroutine stacks. |

<!-- markdownlint-enable MD013 -->

Expand Down
4 changes: 3 additions & 1 deletion completions/fish/crio.fish
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

function __fish_crio_no_subcommand --description 'Test if there has been any subcommand yet'
for i in (commandline -opc)
if contains -- $i check complete completion help h config man markdown md status config c containers container cs s info i version wipe help h
if contains -- $i check complete completion help h config man markdown md status config c containers container cs s info i goroutines g version wipe help h
return 1
end
end
Expand Down Expand Up @@ -221,6 +221,8 @@ complete -r -c crio -n '__fish_seen_subcommand_from status' -a 'containers conta
complete -c crio -n '__fish_seen_subcommand_from containers container cs s' -f -l id -s i -r -d 'the container ID'
complete -c crio -n '__fish_seen_subcommand_from info i' -f -l help -s h -d 'show help'
complete -r -c crio -n '__fish_seen_subcommand_from status' -a 'info i' -d 'Retrieve generic information about CRI-O, such as the cgroup and storage driver.'
complete -c crio -n '__fish_seen_subcommand_from goroutines g' -f -l help -s h -d 'show help'
complete -r -c crio -n '__fish_seen_subcommand_from status' -a 'goroutines g' -d 'Display the goroutine stack.'
complete -c crio -n '__fish_seen_subcommand_from version' -f -l help -s h -d 'show help'
complete -r -c crio -n '__fish_crio_no_subcommand' -a 'version' -d 'display detailed version information'
complete -c crio -n '__fish_seen_subcommand_from version' -f -l json -s j -d 'print JSON instead of text'
Expand Down
4 changes: 4 additions & 0 deletions docs/crio.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,10 @@ Display detailed information about the provided container ID.

Retrieve generic information about CRI-O, such as the cgroup and storage driver.

### goroutines, g

Display the goroutine stack.

## version

display detailed version information
Expand Down
52 changes: 27 additions & 25 deletions internal/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ package client

import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"syscall"
"time"

json "github.com/json-iterator/go"

"github.com/cri-o/cri-o/pkg/types"
"github.com/cri-o/cri-o/server"
)
Expand All @@ -24,6 +23,7 @@ type CrioClient interface {
DaemonInfo(context.Context) (types.CrioInfo, error)
ContainerInfo(context.Context, string) (*types.ContainerInfo, error)
ConfigInfo(context.Context) (string, error)
GoRoutinesInfo(context.Context) (string, error)
}

type crioClientImpl struct {
Expand Down Expand Up @@ -58,7 +58,7 @@ func New(crioSocketPath string) (CrioClient, error) {
}, nil
}

func (c *crioClientImpl) getRequest(ctx context.Context, path string) (*http.Request, error) {
func (c *crioClientImpl) doGetRequest(ctx context.Context, path string) ([]byte, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, path, http.NoBody)
if err != nil {
return nil, err
Expand All @@ -68,57 +68,59 @@ func (c *crioClientImpl) getRequest(ctx context.Context, path string) (*http.Req
req.Host = "crio"
req.URL.Host = c.crioSocketPath
req.URL.Scheme = "http"
return req, nil

resp, err := c.client.Do(req)
if err != nil {
return nil, fmt.Errorf("do get request: %w", err)
}

defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read body: %w", err)
}

return body, nil
}

// DaemonInfo return cri-o daemon info from the cri-o
// info endpoint.
func (c *crioClientImpl) DaemonInfo(ctx context.Context) (types.CrioInfo, error) {
info := types.CrioInfo{}
req, err := c.getRequest(ctx, server.InspectInfoEndpoint)
if err != nil {
return info, err
}
resp, err := c.client.Do(req)
body, err := c.doGetRequest(ctx, server.InspectInfoEndpoint)
if err != nil {
return info, err
}
defer resp.Body.Close()
err = json.NewDecoder(resp.Body).Decode(&info)
err = json.Unmarshal(body, &info)
return info, err
}

// ContainerInfo returns container info by querying
// the cri-o container endpoint.
func (c *crioClientImpl) ContainerInfo(ctx context.Context, id string) (*types.ContainerInfo, error) {
req, err := c.getRequest(ctx, server.InspectContainersEndpoint+"/"+id)
if err != nil {
return nil, err
}
resp, err := c.client.Do(req)
body, err := c.doGetRequest(ctx, server.InspectContainersEndpoint+"/"+id)
if err != nil {
return nil, err
}
defer resp.Body.Close()
cInfo := types.ContainerInfo{}
if err := json.NewDecoder(resp.Body).Decode(&cInfo); err != nil {
if err := json.Unmarshal(body, &cInfo); err != nil {
return nil, err
}
return &cInfo, nil
}

// ConfigInfo returns current config as TOML string.
func (c *crioClientImpl) ConfigInfo(ctx context.Context) (string, error) {
req, err := c.getRequest(ctx, server.InspectConfigEndpoint)
body, err := c.doGetRequest(ctx, server.InspectConfigEndpoint)
if err != nil {
return "", err
}
resp, err := c.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
return string(body), nil
}

// GoRoutinesInfo returns go routine stack as string.
func (c *crioClientImpl) GoRoutinesInfo(ctx context.Context) (string, error) {
body, err := c.doGetRequest(ctx, server.InspectGoRoutinesEndpoint)
if err != nil {
return "", err
}
Expand Down
25 changes: 23 additions & 2 deletions internal/criocli/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,18 @@ var StatusCommand = &cli.Command{
Aliases: []string{"i"},
Name: "info",
Usage: "Retrieve generic information about CRI-O, such as the cgroup and storage driver.",
}, {
Action: goroutines,
Aliases: []string{"g"},
Name: "goroutines",
Usage: "Display the goroutine stack.",
}},
}

func crioClient(c *cli.Context) (client.CrioClient, error) {
return client.New(c.String(socketArg))

Check warning on line 60 in internal/criocli/status.go

View check run for this annotation

Codecov / codecov/patch

internal/criocli/status.go#L59-L60

Added lines #L59 - L60 were not covered by tests
}

func configSubCommand(c *cli.Context) error {
crioClient, err := crioClient(c)
if err != nil {
Expand Down Expand Up @@ -135,6 +144,18 @@ func info(c *cli.Context) error {
return nil
}

func crioClient(c *cli.Context) (client.CrioClient, error) {
return client.New(c.String(socketArg))
func goroutines(c *cli.Context) error {
crioClient, err := crioClient(c)
if err != nil {
return err
}

Check warning on line 151 in internal/criocli/status.go

View check run for this annotation

Codecov / codecov/patch

internal/criocli/status.go#L147-L151

Added lines #L147 - L151 were not covered by tests

goroutineStack, err := crioClient.GoRoutinesInfo(c.Context)
if err != nil {
return err
}

Check warning on line 156 in internal/criocli/status.go

View check run for this annotation

Codecov / codecov/patch

internal/criocli/status.go#L153-L156

Added lines #L153 - L156 were not covered by tests

fmt.Print(goroutineStack)

return nil

Check warning on line 160 in internal/criocli/status.go

View check run for this annotation

Codecov / codecov/patch

internal/criocli/status.go#L158-L160

Added lines #L158 - L160 were not covered by tests
}
10 changes: 10 additions & 0 deletions server/inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/cri-o/cri-o/internal/log"
"github.com/cri-o/cri-o/internal/oci"
"github.com/cri-o/cri-o/pkg/types"
"github.com/cri-o/cri-o/utils"
)

func (s *Server) getIDMappingsInfo() types.IDMappings {
Expand Down Expand Up @@ -124,6 +125,7 @@ const (
InspectInfoEndpoint = "/info"
InspectPauseEndpoint = "/pause"
InspectUnpauseEndpoint = "/unpause"
InspectGoRoutinesEndpoint = "/debug/goroutines"
)

// GetExtendInterfaceMux returns the mux used to serve extend interface requests.
Expand Down Expand Up @@ -244,6 +246,14 @@ func (s *Server) GetExtendInterfaceMux(enableProfile bool) *chi.Mux {
}
}))

mux.Get(InspectGoRoutinesEndpoint, http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
w.Header().Set("Content-Type", "text/plain")
if err := utils.WriteGoroutineStacksTo(w); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}

Check warning on line 254 in server/inspect.go

View check run for this annotation

Codecov / codecov/patch

server/inspect.go#L250-L254

Added lines #L250 - L254 were not covered by tests
}))

// Add pprof handlers
if enableProfile {
mux.Get("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline))
Expand Down
5 changes: 5 additions & 0 deletions test/status.bats
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,8 @@ function teardown() {
@test "should fail to retrieve the container with invalid socket" {
run -1 "${CRIO_BINARY_PATH}" status --socket wrong.sock s
}

@test "status should succeed to retrieve the goroutines" {
run -0 "${CRIO_BINARY_PATH}" status --socket="${CRIO_SOCKET}" goroutines
[[ "$output" == *"goroutine"* ]]
}
17 changes: 13 additions & 4 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,25 @@ func WriteGoroutineStacksToFile(path string) error {
}
defer f.Close()

if err := WriteGoroutineStacksTo(f); err != nil {
return err
}

Check warning on line 110 in utils/utils.go

View check run for this annotation

Codecov / codecov/patch

utils/utils.go#L109-L110

Added lines #L109 - L110 were not covered by tests

return f.Sync()
}

// WriteGoroutineStacksToFile write goroutine stacks
// to the specified file.
func WriteGoroutineStacksTo(f io.Writer) error {
// Print goroutines stacks using the same format
// as if an unrecoverable panic would occur. The
// internal buffer is 64 MiB, which hopefully
// will be sufficient.
err = pprof.Lookup("goroutine").WriteTo(f, 2)
if err != nil {
return err
if err := pprof.Lookup("goroutine").WriteTo(f, 2); err != nil {
return fmt.Errorf("write goroutines: %w", err)

Check warning on line 123 in utils/utils.go

View check run for this annotation

Codecov / codecov/patch

utils/utils.go#L123

Added line #L123 was not covered by tests
}

return f.Sync()
return nil
}

// GenerateID generates a random unique id.
Expand Down

0 comments on commit 2665ada

Please sign in to comment.