Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reapply "[ASCII-2587] Migrating TraceAgent to use IPC cert" #32355

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions cmd/agent/subcommands/flare/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,18 @@ func readProfileData(seconds int) (flare.ProfileData, error) {

type pprofGetter func(path string) ([]byte, error)

tcpGet := func(portConfig string) pprofGetter {
pprofURL := fmt.Sprintf("http://127.0.0.1:%d/debug/pprof", pkgconfigsetup.Datadog().GetInt(portConfig))
tcpGet := func(portConfig string, onHTTPS bool) pprofGetter {
endpoint := url.URL{
Scheme: "http",
Host: net.JoinHostPort("127.0.0.1", strconv.Itoa(pkgconfigsetup.Datadog().GetInt(portConfig))),
Path: "/debug/pprof",
}
if onHTTPS {
endpoint.Scheme = "https"
}

return func(path string) ([]byte, error) {
return util.DoGet(c, pprofURL+path, util.LeaveConnectionOpen)
return util.DoGet(c, endpoint.String()+path, util.LeaveConnectionOpen)
}
}

Expand Down Expand Up @@ -228,15 +236,15 @@ func readProfileData(seconds int) (flare.ProfileData, error) {
}

agentCollectors := map[string]agentProfileCollector{
"core": serviceProfileCollector(tcpGet("expvar_port"), seconds),
"security-agent": serviceProfileCollector(tcpGet("security_agent.expvar_port"), seconds),
"core": serviceProfileCollector(tcpGet("expvar_port", false), seconds),
"security-agent": serviceProfileCollector(tcpGet("security_agent.expvar_port", false), seconds),
}

if pkgconfigsetup.Datadog().GetBool("process_config.enabled") ||
pkgconfigsetup.Datadog().GetBool("process_config.container_collection.enabled") ||
pkgconfigsetup.Datadog().GetBool("process_config.process_collection.enabled") {

agentCollectors["process"] = serviceProfileCollector(tcpGet("process_config.expvar_port"), seconds)
agentCollectors["process"] = serviceProfileCollector(tcpGet("process_config.expvar_port", false), seconds)
}

if pkgconfigsetup.Datadog().GetBool("apm_config.enabled") {
Expand All @@ -249,7 +257,7 @@ func readProfileData(seconds int) (flare.ProfileData, error) {
traceCpusec = 4
}

agentCollectors["trace"] = serviceProfileCollector(tcpGet("apm_config.debug.port"), traceCpusec)
agentCollectors["trace"] = serviceProfileCollector(tcpGet("apm_config.debug.port", true), traceCpusec)
}

if pkgconfigsetup.SystemProbe().GetBool("system_probe_config.enabled") {
Expand Down
18 changes: 14 additions & 4 deletions cmd/agent/subcommands/flare/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type commandTestSuite struct {
suite.Suite
sysprobeSocketPath string
tcpServer *httptest.Server
tcpTLSServer *httptest.Server
unixServer *httptest.Server
systemProbeServer *httptest.Server
}
Expand All @@ -42,13 +43,17 @@ func (c *commandTestSuite) SetupSuite() {
// This should be called by each test that requires them.
func (c *commandTestSuite) startTestServers() {
t := c.T()
c.tcpServer, c.unixServer, c.systemProbeServer = c.getPprofTestServer()
c.tcpServer, c.tcpTLSServer, c.unixServer, c.systemProbeServer = c.getPprofTestServer()

t.Cleanup(func() {
if c.tcpServer != nil {
c.tcpServer.Close()
c.tcpServer = nil
}
if c.tcpTLSServer != nil {
c.tcpTLSServer.Close()
c.tcpTLSServer = nil
}
if c.unixServer != nil {
c.unixServer.Close()
c.unixServer = nil
Expand Down Expand Up @@ -82,12 +87,13 @@ func newMockHandler() http.HandlerFunc {
})
}

func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, unixServer *httptest.Server, sysProbeServer *httptest.Server) {
func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, tcpTLSServer *httptest.Server, unixServer *httptest.Server, sysProbeServer *httptest.Server) {
var err error
t := c.T()

handler := newMockHandler()
tcpServer = httptest.NewServer(handler)
tcpTLSServer = httptest.NewTLSServer(handler)
if runtime.GOOS == "linux" {
unixServer = httptest.NewUnstartedServer(handler)
unixServer.Listener, err = net.Listen("unix", c.sysprobeSocketPath)
Expand All @@ -101,7 +107,7 @@ func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, uni
sysProbeServer.Start()
}

return tcpServer, unixServer, sysProbeServer
return tcpServer, tcpTLSServer, unixServer, sysProbeServer
}

func TestCommandTestSuite(t *testing.T) {
Expand All @@ -116,10 +122,14 @@ func (c *commandTestSuite) TestReadProfileData() {
require.NoError(t, err)
port := u.Port()

u, err = url.Parse(c.tcpTLSServer.URL)
require.NoError(t, err)
httpsPort := u.Port()

mockConfig := configmock.New(t)
mockConfig.SetWithoutSource("expvar_port", port)
mockConfig.SetWithoutSource("apm_config.enabled", true)
mockConfig.SetWithoutSource("apm_config.debug.port", port)
mockConfig.SetWithoutSource("apm_config.debug.port", httpsPort)
mockConfig.SetWithoutSource("apm_config.receiver_timeout", "10")
mockConfig.SetWithoutSource("process_config.expvar_port", port)
mockConfig.SetWithoutSource("security_agent.expvar_port", port)
Expand Down
2 changes: 1 addition & 1 deletion cmd/agent/subcommands/secret/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func traceAgentSecretRefresh(conf config.Component) ([]byte, error) {
c := apiutil.GetClient(false)
c.Timeout = conf.GetDuration("server_timeout") * time.Second

url := fmt.Sprintf("http://127.0.0.1:%d/secret/refresh", port)
url := fmt.Sprintf("https://127.0.0.1:%d/secret/refresh", port)
res, err := apiutil.DoGet(c, url, apiutil.CloseConnection)
if err != nil {
return nil, fmt.Errorf("could not contact trace-agent: %s", err)
Expand Down
4 changes: 4 additions & 0 deletions comp/trace/agent/impl/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"go.opentelemetry.io/collector/pdata/ptrace"
"go.uber.org/fx"

"github.com/DataDog/datadog-agent/comp/api/authtoken"
"github.com/DataDog/datadog-agent/comp/core/secrets"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/dogstatsd/statsd"
Expand Down Expand Up @@ -68,6 +69,7 @@ type dependencies struct {
Statsd statsd.Component
Tagger tagger.Component
Compressor compression.Component
At authtoken.Component
}

var _ traceagent.Component = (*component)(nil)
Expand All @@ -93,6 +95,7 @@ type component struct {
params *Params
tagger tagger.Component
telemetryCollector telemetry.TelemetryCollector
at authtoken.Component
wg *sync.WaitGroup
}

Expand All @@ -115,6 +118,7 @@ func NewAgent(deps dependencies) (traceagent.Component, error) {
params: deps.Params,
telemetryCollector: deps.TelemetryCollector,
tagger: deps.Tagger,
at: deps.At,
wg: &sync.WaitGroup{},
}
statsdCl, err := setupMetrics(deps.Statsd, c.config, c.telemetryCollector)
Expand Down
3 changes: 3 additions & 0 deletions comp/trace/agent/impl/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ func runAgentSidekicks(ag component) error {
}))
}

// Configure the Trace Agent Debug server to use the IPC certificate
ag.Agent.DebugServer.SetTLSConfig(ag.at.GetTLSServerConfig())

log.Infof("Trace agent running on host %s", tracecfg.Hostname)
if pcfg := profilingConfig(tracecfg); pcfg != nil {
if err := profiling.Start(*pcfg); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions comp/trace/bundle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
"github.com/stretchr/testify/require"
"go.uber.org/fx"

"github.com/DataDog/datadog-agent/comp/api/authtoken/createandfetchimpl"
"github.com/DataDog/datadog-agent/comp/api/authtoken/fetchonlyimpl"
"github.com/DataDog/datadog-agent/comp/core"
coreconfig "github.com/DataDog/datadog-agent/comp/core/config"
log "github.com/DataDog/datadog-agent/comp/core/log/def"
Expand Down Expand Up @@ -45,6 +47,7 @@ func TestBundleDependencies(t *testing.T) {
zstdfx.Module(),
taggerfx.Module(tagger.Params{}),
fx.Supply(&traceagentimpl.Params{}),
createandfetchimpl.Module(),
)
}

Expand Down Expand Up @@ -75,6 +78,7 @@ func TestMockBundleDependencies(t *testing.T) {
fx.Invoke(func(_ traceagent.Component) {}),
MockBundle(),
taggerfx.Module(tagger.Params{}),
fetchonlyimpl.MockModule(),
))

require.NotNil(t, cfg.Object())
Expand Down
2 changes: 1 addition & 1 deletion comp/trace/status/statusimpl/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (s statusProvider) populateStatus() map[string]interface{} {
port := s.Config.GetInt("apm_config.debug.port")

c := client()
url := fmt.Sprintf("http://localhost:%d/debug/vars", port)
url := fmt.Sprintf("https://localhost:%d/debug/vars", port)
resp, err := apiutil.DoGet(c, url, apiutil.CloseConnection)
if err != nil {
return map[string]interface{}{
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/fetcher/from_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func TraceAgentConfig(config config.Reader) (string, error) {
c := util.GetClient(false)
c.Timeout = config.GetDuration("server_timeout") * time.Second

ipcAddressWithPort := fmt.Sprintf("http://127.0.0.1:%d/config", port)
ipcAddressWithPort := fmt.Sprintf("https://127.0.0.1:%d/config", port)

client := settingshttp.NewClient(c, ipcAddressWithPort, "trace-agent", settingshttp.NewHTTPClientOptions(util.CloseConnection))
return client.FullConfig()
Expand Down
2 changes: 1 addition & 1 deletion pkg/flare/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func getExpVar(fb flaretypes.FlareBuilder) error {

apmDebugPort := pkgconfigsetup.Datadog().GetInt("apm_config.debug.port")
f := filepath.Join("expvar", "trace-agent")
resp, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/debug/vars", apmDebugPort))
resp, err := http.Get(fmt.Sprintf("https://127.0.0.1:%d/debug/vars", apmDebugPort))
if err != nil {
return fb.AddFile(f, []byte(fmt.Sprintf("Error retrieving vars: %v", err)))
}
Expand Down
18 changes: 15 additions & 3 deletions pkg/trace/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1042,14 +1042,26 @@ func TestExpvar(t *testing.T) {
}

c := newTestReceiverConfig()
c.DebugServerPort = 5012
c.DebugServerPort = 6789
info.InitInfo(c)

// Starting a TLS httptest server to retrieve tlsCert
ts := httptest.NewTLSServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {}))
tlsConfig := ts.TLS.Clone()
// Setting a client with the proper TLS configuration
client := ts.Client()
ts.Close()

// Starting Debug Server
s := NewDebugServer(c)
s.SetTLSConfig(tlsConfig)

// Starting the Debug server
s.Start()
defer s.Stop()

resp, err := http.Get("http://127.0.0.1:5012/debug/vars")
assert.NoError(t, err)
resp, err := client.Get(fmt.Sprintf("https://127.0.0.1:%d/debug/vars", c.DebugServerPort))
require.NoError(t, err)
defer resp.Body.Close()

t.Run("read-expvars", func(t *testing.T) {
Expand Down
18 changes: 13 additions & 5 deletions pkg/trace/api/debug_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package api

import (
"context"
"crypto/tls"
"expvar"
"fmt"
"net"
Expand All @@ -29,9 +30,10 @@ const (

// DebugServer serves /debug/* endpoints
type DebugServer struct {
conf *config.AgentConfig
server *http.Server
mux *http.ServeMux
conf *config.AgentConfig
server *http.Server
mux *http.ServeMux
tlsConfig *tls.Config
}

// NewDebugServer returns a debug server
Expand All @@ -53,13 +55,14 @@ func (ds *DebugServer) Start() {
WriteTimeout: defaultTimeout,
Handler: ds.setupMux(),
}
listener, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", ds.conf.DebugServerPort))
listener, err := net.Listen("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(ds.conf.DebugServerPort)))
if err != nil {
log.Errorf("Error creating debug server listener: %s", err)
return
}
tlsListener := tls.NewListener(listener, ds.tlsConfig)
go func() {
if err := ds.server.Serve(listener); err != nil && err != http.ErrServerClosed {
if err := ds.server.Serve(tlsListener); err != nil && err != http.ErrServerClosed {
log.Errorf("Could not start debug server: %s. Debug server disabled.", err)
}
}()
Expand All @@ -82,6 +85,11 @@ func (ds *DebugServer) AddRoute(route string, handler http.Handler) {
ds.mux.Handle(route, handler)
}

// SetTLSConfig adds the provided tls.Config to the internal http.Server
func (ds *DebugServer) SetTLSConfig(config *tls.Config) {
ds.tlsConfig = config
}

func (ds *DebugServer) setupMux() *http.ServeMux {
ds.mux.HandleFunc("/debug/pprof/", pprof.Index)
ds.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
Expand Down
6 changes: 4 additions & 2 deletions pkg/trace/info/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package info

import (
"bytes"
"crypto/tls"
"encoding/json"
"expvar" // automatically publish `/debug/vars` on HTTP port
"fmt"
Expand Down Expand Up @@ -236,8 +237,9 @@ func getProgramBanner(version string) (string, string) {
// If error is nil, means the program is running.
// If not, it displays a pretty-printed message anyway (for support)
func Info(w io.Writer, conf *config.AgentConfig) error {
url := fmt.Sprintf("http://127.0.0.1:%d/debug/vars", conf.DebugServerPort)
client := http.Client{Timeout: 3 * time.Second}
url := fmt.Sprintf("https://127.0.0.1:%d/debug/vars", conf.DebugServerPort)
tr := &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}
client := http.Client{Timeout: 3 * time.Second, Transport: tr}
resp, err := client.Get(url)
if err != nil {
// OK, here, we can't even make an http call on the agent port,
Expand Down
8 changes: 4 additions & 4 deletions pkg/trace/info/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (h *testServerHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {

func testServer(t *testing.T, testFile string) *httptest.Server {
t.Helper()
server := httptest.NewServer(&testServerHandler{t: t, testFile: testFile})
server := httptest.NewTLSServer(&testServerHandler{t: t, testFile: testFile})
t.Logf("test server (serving fake yet valid data) listening on %s", server.URL)
return server
}
Expand Down Expand Up @@ -94,7 +94,7 @@ func (h *testServerWarningHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ
}

func testServerWarning(t *testing.T) *httptest.Server {
server := httptest.NewServer(&testServerWarningHandler{t: t})
server := httptest.NewTLSServer(&testServerWarningHandler{t: t})
t.Logf("test server (serving data containing worrying values) listening on %s", server.URL)
return server
}
Expand All @@ -119,7 +119,7 @@ func (h *testServerErrorHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
}

func testServerError(t *testing.T) *httptest.Server {
server := httptest.NewServer(&testServerErrorHandler{t: t})
server := httptest.NewTLSServer(&testServerErrorHandler{t: t})
t.Logf("test server (serving bad data to trigger errors) listening on %s", server.URL)
return server
}
Expand Down Expand Up @@ -331,7 +331,7 @@ func TestError(t *testing.T) {
assert.Equal(len(lines[1]), len(lines[2]))
assert.Equal("", lines[3])
assert.Regexp(regexp.MustCompile(`^ Error: .*$`), lines[4])
assert.Equal(fmt.Sprintf(" URL: http://127.0.0.1:%d/debug/vars", port), lines[5])
assert.Equal(fmt.Sprintf(" URL: https://127.0.0.1:%d/debug/vars", port), lines[5])
assert.Equal("", lines[6])
assert.Equal("", lines[7])
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ type agentConfigEndpointInfo struct {
}

func traceConfigEndpoint(port int) agentConfigEndpointInfo {
return agentConfigEndpointInfo{"trace-agent", "http", port, "/config"}
return agentConfigEndpointInfo{"trace-agent", "https", port, "/config"}
}

func processConfigEndpoint(port int) agentConfigEndpointInfo {
Expand Down
Loading