From 5f17011693625481d2c382829bfb4a84b5b8d766 Mon Sep 17 00:00:00 2001 From: Maxime Riaud <65339037+misteriaud@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:05:28 -0600 Subject: [PATCH 1/2] Revert "Revert "[ASCII-2587] Migrating TraceAgent to use IPC cert" (#32320)" This reverts commit 6d0ce2d61353b6fca280b60e157c06642515fc9c. --- cmd/agent/subcommands/flare/command.go | 22 ++++++++++++++------- cmd/agent/subcommands/flare/command_test.go | 18 +++++++++++++---- cmd/agent/subcommands/secret/command.go | 2 +- comp/trace/agent/impl/agent.go | 4 ++++ comp/trace/agent/impl/run.go | 3 +++ comp/trace/bundle_test.go | 4 ++++ comp/trace/status/statusimpl/status.go | 2 +- pkg/config/fetcher/from_processes.go | 2 +- pkg/flare/archive.go | 2 +- pkg/trace/api/api_test.go | 18 ++++++++++++++--- pkg/trace/api/debug_server.go | 18 ++++++++++++----- pkg/trace/info/info.go | 6 ++++-- pkg/trace/info/info_test.go | 8 ++++---- 13 files changed, 80 insertions(+), 29 deletions(-) diff --git a/cmd/agent/subcommands/flare/command.go b/cmd/agent/subcommands/flare/command.go index 1b4fb2d3bc1e3..a6da5391ae5d5 100644 --- a/cmd/agent/subcommands/flare/command.go +++ b/cmd/agent/subcommands/flare/command.go @@ -175,10 +175,18 @@ func readProfileData(seconds int) (flare.ProfileData, error) { type pprofGetter func(path string) ([]byte, error) - tcpGet := func(portConfig string) pprofGetter { - pprofURL := fmt.Sprintf("http://127.0.0.1:%d/debug/pprof", pkgconfigsetup.Datadog().GetInt(portConfig)) + tcpGet := func(portConfig string, onHTTPS bool) pprofGetter { + endpoint := url.URL{ + Scheme: "http", + Host: net.JoinHostPort("127.0.0.1", strconv.Itoa(pkgconfigsetup.Datadog().GetInt(portConfig))), + Path: "/debug/pprof", + } + if onHTTPS { + endpoint.Scheme = "https" + } + return func(path string) ([]byte, error) { - return util.DoGet(c, pprofURL+path, util.LeaveConnectionOpen) + return util.DoGet(c, endpoint.String()+path, util.LeaveConnectionOpen) } } @@ -228,15 +236,15 @@ func readProfileData(seconds int) (flare.ProfileData, error) { } agentCollectors := map[string]agentProfileCollector{ - "core": serviceProfileCollector(tcpGet("expvar_port"), seconds), - "security-agent": serviceProfileCollector(tcpGet("security_agent.expvar_port"), seconds), + "core": serviceProfileCollector(tcpGet("expvar_port", false), seconds), + "security-agent": serviceProfileCollector(tcpGet("security_agent.expvar_port", false), seconds), } if pkgconfigsetup.Datadog().GetBool("process_config.enabled") || pkgconfigsetup.Datadog().GetBool("process_config.container_collection.enabled") || pkgconfigsetup.Datadog().GetBool("process_config.process_collection.enabled") { - agentCollectors["process"] = serviceProfileCollector(tcpGet("process_config.expvar_port"), seconds) + agentCollectors["process"] = serviceProfileCollector(tcpGet("process_config.expvar_port", false), seconds) } if pkgconfigsetup.Datadog().GetBool("apm_config.enabled") { @@ -249,7 +257,7 @@ func readProfileData(seconds int) (flare.ProfileData, error) { traceCpusec = 4 } - agentCollectors["trace"] = serviceProfileCollector(tcpGet("apm_config.debug.port"), traceCpusec) + agentCollectors["trace"] = serviceProfileCollector(tcpGet("apm_config.debug.port", true), traceCpusec) } if pkgconfigsetup.SystemProbe().GetBool("system_probe_config.enabled") { diff --git a/cmd/agent/subcommands/flare/command_test.go b/cmd/agent/subcommands/flare/command_test.go index a27304e95b133..4c96cae079aa9 100644 --- a/cmd/agent/subcommands/flare/command_test.go +++ b/cmd/agent/subcommands/flare/command_test.go @@ -29,6 +29,7 @@ type commandTestSuite struct { suite.Suite sysprobeSocketPath string tcpServer *httptest.Server + tcpTLSServer *httptest.Server unixServer *httptest.Server systemProbeServer *httptest.Server } @@ -42,13 +43,17 @@ func (c *commandTestSuite) SetupSuite() { // This should be called by each test that requires them. func (c *commandTestSuite) startTestServers() { t := c.T() - c.tcpServer, c.unixServer, c.systemProbeServer = c.getPprofTestServer() + c.tcpServer, c.tcpTLSServer, c.unixServer, c.systemProbeServer = c.getPprofTestServer() t.Cleanup(func() { if c.tcpServer != nil { c.tcpServer.Close() c.tcpServer = nil } + if c.tcpTLSServer != nil { + c.tcpTLSServer.Close() + c.tcpTLSServer = nil + } if c.unixServer != nil { c.unixServer.Close() c.unixServer = nil @@ -82,12 +87,13 @@ func newMockHandler() http.HandlerFunc { }) } -func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, unixServer *httptest.Server, sysProbeServer *httptest.Server) { +func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, tcpTLSServer *httptest.Server, unixServer *httptest.Server, sysProbeServer *httptest.Server) { var err error t := c.T() handler := newMockHandler() tcpServer = httptest.NewServer(handler) + tcpTLSServer = httptest.NewTLSServer(handler) if runtime.GOOS == "linux" { unixServer = httptest.NewUnstartedServer(handler) unixServer.Listener, err = net.Listen("unix", c.sysprobeSocketPath) @@ -101,7 +107,7 @@ func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, uni sysProbeServer.Start() } - return tcpServer, unixServer, sysProbeServer + return tcpServer, tcpTLSServer, unixServer, sysProbeServer } func TestCommandTestSuite(t *testing.T) { @@ -116,10 +122,14 @@ func (c *commandTestSuite) TestReadProfileData() { require.NoError(t, err) port := u.Port() + u, err = url.Parse(c.tcpTLSServer.URL) + require.NoError(t, err) + httpsPort := u.Port() + mockConfig := configmock.New(t) mockConfig.SetWithoutSource("expvar_port", port) mockConfig.SetWithoutSource("apm_config.enabled", true) - mockConfig.SetWithoutSource("apm_config.debug.port", port) + mockConfig.SetWithoutSource("apm_config.debug.port", httpsPort) mockConfig.SetWithoutSource("apm_config.receiver_timeout", "10") mockConfig.SetWithoutSource("process_config.expvar_port", port) mockConfig.SetWithoutSource("security_agent.expvar_port", port) diff --git a/cmd/agent/subcommands/secret/command.go b/cmd/agent/subcommands/secret/command.go index e24f95ca6d693..d8b2a7048114f 100644 --- a/cmd/agent/subcommands/secret/command.go +++ b/cmd/agent/subcommands/secret/command.go @@ -101,7 +101,7 @@ func traceAgentSecretRefresh(conf config.Component) ([]byte, error) { c := apiutil.GetClient(false) c.Timeout = conf.GetDuration("server_timeout") * time.Second - url := fmt.Sprintf("http://127.0.0.1:%d/secret/refresh", port) + url := fmt.Sprintf("https://127.0.0.1:%d/secret/refresh", port) res, err := apiutil.DoGet(c, url, apiutil.CloseConnection) if err != nil { return nil, fmt.Errorf("could not contact trace-agent: %s", err) diff --git a/comp/trace/agent/impl/agent.go b/comp/trace/agent/impl/agent.go index b3d783288ceb6..de9237dedeea5 100644 --- a/comp/trace/agent/impl/agent.go +++ b/comp/trace/agent/impl/agent.go @@ -24,6 +24,7 @@ import ( "go.opentelemetry.io/collector/pdata/ptrace" "go.uber.org/fx" + "github.com/DataDog/datadog-agent/comp/api/authtoken" "github.com/DataDog/datadog-agent/comp/core/secrets" tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" "github.com/DataDog/datadog-agent/comp/dogstatsd/statsd" @@ -68,6 +69,7 @@ type dependencies struct { Statsd statsd.Component Tagger tagger.Component Compressor compression.Component + At authtoken.Component } var _ traceagent.Component = (*component)(nil) @@ -93,6 +95,7 @@ type component struct { params *Params tagger tagger.Component telemetryCollector telemetry.TelemetryCollector + at authtoken.Component wg *sync.WaitGroup } @@ -115,6 +118,7 @@ func NewAgent(deps dependencies) (traceagent.Component, error) { params: deps.Params, telemetryCollector: deps.TelemetryCollector, tagger: deps.Tagger, + at: deps.At, wg: &sync.WaitGroup{}, } statsdCl, err := setupMetrics(deps.Statsd, c.config, c.telemetryCollector) diff --git a/comp/trace/agent/impl/run.go b/comp/trace/agent/impl/run.go index 20ecbc6496a1b..f40b36e29ae7f 100644 --- a/comp/trace/agent/impl/run.go +++ b/comp/trace/agent/impl/run.go @@ -98,6 +98,9 @@ func runAgentSidekicks(ag component) error { })) } + // Configure the Trace Agent Debug server to use the IPC certificate + ag.Agent.DebugServer.SetTLSConfig(ag.at.GetTLSServerConfig()) + log.Infof("Trace agent running on host %s", tracecfg.Hostname) if pcfg := profilingConfig(tracecfg); pcfg != nil { if err := profiling.Start(*pcfg); err != nil { diff --git a/comp/trace/bundle_test.go b/comp/trace/bundle_test.go index e9874a4b40077..692e7c255f473 100644 --- a/comp/trace/bundle_test.go +++ b/comp/trace/bundle_test.go @@ -13,6 +13,8 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/fx" + "github.com/DataDog/datadog-agent/comp/api/authtoken/createandfetchimpl" + "github.com/DataDog/datadog-agent/comp/api/authtoken/fetchonlyimpl" "github.com/DataDog/datadog-agent/comp/core" coreconfig "github.com/DataDog/datadog-agent/comp/core/config" log "github.com/DataDog/datadog-agent/comp/core/log/def" @@ -45,6 +47,7 @@ func TestBundleDependencies(t *testing.T) { zstdfx.Module(), taggerfx.Module(tagger.Params{}), fx.Supply(&traceagentimpl.Params{}), + createandfetchimpl.Module(), ) } @@ -75,6 +78,7 @@ func TestMockBundleDependencies(t *testing.T) { fx.Invoke(func(_ traceagent.Component) {}), MockBundle(), taggerfx.Module(tagger.Params{}), + fetchonlyimpl.MockModule(), )) require.NotNil(t, cfg.Object()) diff --git a/comp/trace/status/statusimpl/status.go b/comp/trace/status/statusimpl/status.go index e476ee0281d7a..00a8730b87da8 100644 --- a/comp/trace/status/statusimpl/status.go +++ b/comp/trace/status/statusimpl/status.go @@ -95,7 +95,7 @@ func (s statusProvider) populateStatus() map[string]interface{} { port := s.Config.GetInt("apm_config.debug.port") c := client() - url := fmt.Sprintf("http://localhost:%d/debug/vars", port) + url := fmt.Sprintf("https://localhost:%d/debug/vars", port) resp, err := apiutil.DoGet(c, url, apiutil.CloseConnection) if err != nil { return map[string]interface{}{ diff --git a/pkg/config/fetcher/from_processes.go b/pkg/config/fetcher/from_processes.go index 5b8088f41d970..95c24e283fb46 100644 --- a/pkg/config/fetcher/from_processes.go +++ b/pkg/config/fetcher/from_processes.go @@ -71,7 +71,7 @@ func TraceAgentConfig(config config.Reader) (string, error) { c := util.GetClient(false) c.Timeout = config.GetDuration("server_timeout") * time.Second - ipcAddressWithPort := fmt.Sprintf("http://127.0.0.1:%d/config", port) + ipcAddressWithPort := fmt.Sprintf("https://127.0.0.1:%d/config", port) client := settingshttp.NewClient(c, ipcAddressWithPort, "trace-agent", settingshttp.NewHTTPClientOptions(util.CloseConnection)) return client.FullConfig() diff --git a/pkg/flare/archive.go b/pkg/flare/archive.go index c3e5dffe00ca8..b61c6d9dd6ed2 100644 --- a/pkg/flare/archive.go +++ b/pkg/flare/archive.go @@ -214,7 +214,7 @@ func getExpVar(fb flaretypes.FlareBuilder) error { apmDebugPort := pkgconfigsetup.Datadog().GetInt("apm_config.debug.port") f := filepath.Join("expvar", "trace-agent") - resp, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/debug/vars", apmDebugPort)) + resp, err := http.Get(fmt.Sprintf("https://127.0.0.1:%d/debug/vars", apmDebugPort)) if err != nil { return fb.AddFile(f, []byte(fmt.Sprintf("Error retrieving vars: %v", err))) } diff --git a/pkg/trace/api/api_test.go b/pkg/trace/api/api_test.go index 9a468dc164487..3f3b2c5b3a20b 100644 --- a/pkg/trace/api/api_test.go +++ b/pkg/trace/api/api_test.go @@ -1039,14 +1039,26 @@ func TestExpvar(t *testing.T) { } c := newTestReceiverConfig() - c.DebugServerPort = 5012 + c.DebugServerPort = 6789 info.InitInfo(c) + + // Starting a TLS httptest server to retrieve tlsCert + ts := httptest.NewTLSServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {})) + tlsConfig := ts.TLS.Clone() + // Setting a client with the proper TLS configuration + client := ts.Client() + ts.Close() + + // Starting Debug Server s := NewDebugServer(c) + s.SetTLSConfig(tlsConfig) + + // Starting the Debug server s.Start() defer s.Stop() - resp, err := http.Get("http://127.0.0.1:5012/debug/vars") - assert.NoError(t, err) + resp, err := client.Get(fmt.Sprintf("https://127.0.0.1:%d/debug/vars", c.DebugServerPort)) + require.NoError(t, err) defer resp.Body.Close() t.Run("read-expvars", func(t *testing.T) { diff --git a/pkg/trace/api/debug_server.go b/pkg/trace/api/debug_server.go index 828d5357330eb..6fd2f39cc011a 100644 --- a/pkg/trace/api/debug_server.go +++ b/pkg/trace/api/debug_server.go @@ -9,6 +9,7 @@ package api import ( "context" + "crypto/tls" "expvar" "fmt" "net" @@ -29,9 +30,10 @@ const ( // DebugServer serves /debug/* endpoints type DebugServer struct { - conf *config.AgentConfig - server *http.Server - mux *http.ServeMux + conf *config.AgentConfig + server *http.Server + mux *http.ServeMux + tlsConfig *tls.Config } // NewDebugServer returns a debug server @@ -53,13 +55,14 @@ func (ds *DebugServer) Start() { WriteTimeout: defaultTimeout, Handler: ds.setupMux(), } - listener, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", ds.conf.DebugServerPort)) + listener, err := net.Listen("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(ds.conf.DebugServerPort))) if err != nil { log.Errorf("Error creating debug server listener: %s", err) return } + tlsListener := tls.NewListener(listener, ds.tlsConfig) go func() { - if err := ds.server.Serve(listener); err != nil && err != http.ErrServerClosed { + if err := ds.server.Serve(tlsListener); err != nil && err != http.ErrServerClosed { log.Errorf("Could not start debug server: %s. Debug server disabled.", err) } }() @@ -82,6 +85,11 @@ func (ds *DebugServer) AddRoute(route string, handler http.Handler) { ds.mux.Handle(route, handler) } +// SetTLSConfig adds the provided tls.Config to the internal http.Server +func (ds *DebugServer) SetTLSConfig(config *tls.Config) { + ds.tlsConfig = config +} + func (ds *DebugServer) setupMux() *http.ServeMux { ds.mux.HandleFunc("/debug/pprof/", pprof.Index) ds.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) diff --git a/pkg/trace/info/info.go b/pkg/trace/info/info.go index df86c40587fdc..4198100ee6a5b 100644 --- a/pkg/trace/info/info.go +++ b/pkg/trace/info/info.go @@ -8,6 +8,7 @@ package info import ( "bytes" + "crypto/tls" "encoding/json" "expvar" // automatically publish `/debug/vars` on HTTP port "fmt" @@ -236,8 +237,9 @@ func getProgramBanner(version string) (string, string) { // If error is nil, means the program is running. // If not, it displays a pretty-printed message anyway (for support) func Info(w io.Writer, conf *config.AgentConfig) error { - url := fmt.Sprintf("http://127.0.0.1:%d/debug/vars", conf.DebugServerPort) - client := http.Client{Timeout: 3 * time.Second} + url := fmt.Sprintf("https://127.0.0.1:%d/debug/vars", conf.DebugServerPort) + tr := &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}} + client := http.Client{Timeout: 3 * time.Second, Transport: tr} resp, err := client.Get(url) if err != nil { // OK, here, we can't even make an http call on the agent port, diff --git a/pkg/trace/info/info_test.go b/pkg/trace/info/info_test.go index 01c369cac403e..25b7cb42b13ff 100644 --- a/pkg/trace/info/info_test.go +++ b/pkg/trace/info/info_test.go @@ -63,7 +63,7 @@ func (h *testServerHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { func testServer(t *testing.T, testFile string) *httptest.Server { t.Helper() - server := httptest.NewServer(&testServerHandler{t: t, testFile: testFile}) + server := httptest.NewTLSServer(&testServerHandler{t: t, testFile: testFile}) t.Logf("test server (serving fake yet valid data) listening on %s", server.URL) return server } @@ -94,7 +94,7 @@ func (h *testServerWarningHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ } func testServerWarning(t *testing.T) *httptest.Server { - server := httptest.NewServer(&testServerWarningHandler{t: t}) + server := httptest.NewTLSServer(&testServerWarningHandler{t: t}) t.Logf("test server (serving data containing worrying values) listening on %s", server.URL) return server } @@ -119,7 +119,7 @@ func (h *testServerErrorHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques } func testServerError(t *testing.T) *httptest.Server { - server := httptest.NewServer(&testServerErrorHandler{t: t}) + server := httptest.NewTLSServer(&testServerErrorHandler{t: t}) t.Logf("test server (serving bad data to trigger errors) listening on %s", server.URL) return server } @@ -331,7 +331,7 @@ func TestError(t *testing.T) { assert.Equal(len(lines[1]), len(lines[2])) assert.Equal("", lines[3]) assert.Regexp(regexp.MustCompile(`^ Error: .*$`), lines[4]) - assert.Equal(fmt.Sprintf(" URL: http://127.0.0.1:%d/debug/vars", port), lines[5]) + assert.Equal(fmt.Sprintf(" URL: https://127.0.0.1:%d/debug/vars", port), lines[5]) assert.Equal("", lines[6]) assert.Equal("", lines[7]) } From 58f02460072565caa35e216ced405b07f3555421 Mon Sep 17 00:00:00 2001 From: Maxime Riaud Date: Wed, 18 Dec 2024 11:08:45 -0600 Subject: [PATCH 2/2] fix configsync e2e test --- .../agent-shared-components/config-refresh/config_endpoint.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/new-e2e/tests/agent-shared-components/config-refresh/config_endpoint.go b/test/new-e2e/tests/agent-shared-components/config-refresh/config_endpoint.go index 47c732d2569dc..356f76386e644 100644 --- a/test/new-e2e/tests/agent-shared-components/config-refresh/config_endpoint.go +++ b/test/new-e2e/tests/agent-shared-components/config-refresh/config_endpoint.go @@ -20,7 +20,7 @@ type agentConfigEndpointInfo struct { } func traceConfigEndpoint(port int) agentConfigEndpointInfo { - return agentConfigEndpointInfo{"trace-agent", "http", port, "/config"} + return agentConfigEndpointInfo{"trace-agent", "https", port, "/config"} } func processConfigEndpoint(port int) agentConfigEndpointInfo {