From 2613fb99051e212603102d2e1cfd908115ca6245 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Fri, 21 Jun 2024 13:25:25 +0200 Subject: [PATCH 01/30] wip --- cmd/ferretdb/main.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 6072d4ad16c6..cc673f03ae49 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -381,6 +381,8 @@ func run() { logger := setupLogger(stateProvider, cli.Log.Format) + // todo setup otel + checkFlags(logger) if _, err := maxprocs.Set(maxprocs.Logger(logger.Sugar().Debugf)); err != nil { From 7f7859312b3d753389469577e6dc8434aaa858e8 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 24 Jun 2024 14:52:44 +0200 Subject: [PATCH 02/30] wip --- cmd/ferretdb/main.go | 33 +++++++++++++++++++- internal/util/observability/observability.go | 26 ++++++++++----- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index cc673f03ae49..5fa97ea7f347 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -26,6 +26,8 @@ import ( "sync" "time" + "github.com/FerretDB/FerretDB/internal/util/observability" + "github.com/alecthomas/kong" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/expfmt" @@ -125,6 +127,13 @@ var cli struct { Package string `default:"" help:"Telemetry: custom package type."` } `embed:"" prefix:"telemetry-"` } `embed:"" prefix:"test-"` + + OTEL struct { + Enable bool `default:"false" help:"Enable OpenTelemetry."` + OTLPEndpoint string `default:"127.0.0.1:4318" help:"OTLP exporter endpoint."` + TracesSampler string `default:"always_on" help:"Traces sampler settings, see https://opentelemetry.io/docs/languages/sdk-configuration/general/#otel_traces_sampler." enum:"always_on,always_off,traceidratio"` + TracesSamplerArg string `default:"" help:"Traces sampler argument for traceidratio strategy."` + } `embed:"" prefix:"otel-"` } // The postgreSQLFlags struct represents flags that are used by the "postgresql" backend. @@ -309,6 +318,20 @@ func setupLogger(stateProvider *state.Provider, format string) *zap.Logger { return l } +// setupTracer sets up OpenTelemetry exporter. +func setupTracer() observability.ShutdownFunc { + if !cli.OTEL.Enable { + return nil + } + + shutdown, err := observability.SetupOtel("ferretdb", cli.OTEL.OTLPEndpoint) + if err != nil { + log.Fatalf("Failed to set up OpenTelemetry: %s.", err) + } + + return shutdown +} + // checkFlags checks that CLI flags are not self-contradictory. func checkFlags(logger *zap.Logger) { l := logger.Sugar() @@ -381,7 +404,15 @@ func run() { logger := setupLogger(stateProvider, cli.Log.Format) - // todo setup otel + otelShutdown := setupTracer() + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := otelShutdown(ctx); err != nil { + logger.Sugar().Errorf("Failed to shutdown OpenTelemetry: %s.", err) + } + }() checkFlags(logger) diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index c1ff58cde6d6..acb40c6d8119 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -26,17 +26,27 @@ import ( otelsdkresource "go.opentelemetry.io/otel/sdk/resource" otelsdktrace "go.opentelemetry.io/otel/sdk/trace" otelsemconv "go.opentelemetry.io/otel/semconv/v1.21.0" - - "github.com/FerretDB/FerretDB/internal/util/must" ) -// SetupOtel sets up OpenTelemetry exporter with fixed values. -func SetupOtel(service string) func(context.Context) error { - exporter := must.NotFail(otlptracehttp.New( +// ShutdownFunc is a function that shuts down the OpenTelemetry observability system. +type ShutdownFunc func(context.Context) error + +// SetupOtel sets up OTLP exporter and tracer provider. +// +// If endpoint is empty, no exporter is set up. +func SetupOtel(service string, endpoint string) (ShutdownFunc, error) { + if endpoint == "" { + return nil, nil + } + + exporter, err := otlptracehttp.New( context.TODO(), - otlptracehttp.WithEndpoint("127.0.0.1:4318"), + otlptracehttp.WithEndpoint(endpoint), otlptracehttp.WithInsecure(), - )) + ) + if err != nil { + return nil, err + } tp := otelsdktrace.NewTracerProvider( otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(time.Second)), @@ -48,5 +58,5 @@ func SetupOtel(service string) func(context.Context) error { otel.SetTracerProvider(tp) - return tp.Shutdown + return tp.Shutdown, nil } From 3ab2770431dabf2f2c3cf4a3797797a86eba2475 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 24 Jun 2024 15:20:26 +0200 Subject: [PATCH 03/30] wip --- internal/util/observability/observability.go | 42 +++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index acb40c6d8119..249d21d3bc3a 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -19,6 +19,8 @@ package observability import ( "context" + "errors" + "strconv" "time" "go.opentelemetry.io/otel" @@ -28,31 +30,51 @@ import ( otelsemconv "go.opentelemetry.io/otel/semconv/v1.21.0" ) +// Config is the configuration for OpenTelemetry. +type Config struct { + Service string + Endpoint string + TracesSampler string + TracesSamplerArg string +} + // ShutdownFunc is a function that shuts down the OpenTelemetry observability system. type ShutdownFunc func(context.Context) error // SetupOtel sets up OTLP exporter and tracer provider. -// -// If endpoint is empty, no exporter is set up. -func SetupOtel(service string, endpoint string) (ShutdownFunc, error) { - if endpoint == "" { - return nil, nil - } - +func SetupOtel(config Config) (ShutdownFunc, error) { exporter, err := otlptracehttp.New( context.TODO(), - otlptracehttp.WithEndpoint(endpoint), + otlptracehttp.WithEndpoint(config.Endpoint), otlptracehttp.WithInsecure(), ) if err != nil { return nil, err } + var sampler otelsdktrace.Sampler + switch config.TracesSampler { + case "always_on": + sampler = otelsdktrace.AlwaysSample() + case "always_off": + sampler = otelsdktrace.NeverSample() + case "traceidratio": + ratio, err := strconv.ParseFloat(config.TracesSamplerArg, 64) + if err != nil { + return nil, errors.New("unsupported trace ID ratio: " + config.TracesSamplerArg) + } + + sampler = otelsdktrace.TraceIDRatioBased(ratio) + + default: + return nil, errors.New("unsupported sampler") + } + tp := otelsdktrace.NewTracerProvider( otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(time.Second)), - otelsdktrace.WithSampler(otelsdktrace.AlwaysSample()), + otelsdktrace.WithSampler(sampler), otelsdktrace.WithResource(otelsdkresource.NewSchemaless( - otelsemconv.ServiceNameKey.String(service), + otelsemconv.ServiceNameKey.String(config.Service), )), ) From 1ab6d4e92d031c18ae3f9066ce62b1eaa4b3bd5d Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 24 Jun 2024 23:59:04 +0200 Subject: [PATCH 04/30] wip --- cmd/envtool/tests.go | 10 +++++- cmd/ferretdb/main.go | 38 ++++++++++++-------- integration/setup/startup.go | 7 +++- internal/util/observability/observability.go | 9 +++-- 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index a6fcfdf289e1..9bb94fdeee69 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -91,7 +91,15 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { - shutdownOtel := observability.SetupOtel("envtool tests") + shutdownOtel, err := observability.SetupOtel(observability.Config{ + Service: "envtool tests", + Endpoint: "http://localhost:4317", + TracesSampler: "always_on", + BSPDelay: 5 * time.Second, + }) + if err != nil { + return lazyerrors.Error(err) + } defer func() { shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 5fa97ea7f347..2fcb0c1aa493 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -26,8 +26,6 @@ import ( "sync" "time" - "github.com/FerretDB/FerretDB/internal/util/observability" - "github.com/alecthomas/kong" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/expfmt" @@ -45,6 +43,7 @@ import ( "github.com/FerretDB/FerretDB/internal/util/debugbuild" "github.com/FerretDB/FerretDB/internal/util/logging" "github.com/FerretDB/FerretDB/internal/util/must" + "github.com/FerretDB/FerretDB/internal/util/observability" "github.com/FerretDB/FerretDB/internal/util/password" "github.com/FerretDB/FerretDB/internal/util/state" "github.com/FerretDB/FerretDB/internal/util/telemetry" @@ -129,10 +128,11 @@ var cli struct { } `embed:"" prefix:"test-"` OTEL struct { - Enable bool `default:"false" help:"Enable OpenTelemetry."` - OTLPEndpoint string `default:"127.0.0.1:4318" help:"OTLP exporter endpoint."` - TracesSampler string `default:"always_on" help:"Traces sampler settings, see https://opentelemetry.io/docs/languages/sdk-configuration/general/#otel_traces_sampler." enum:"always_on,always_off,traceidratio"` - TracesSamplerArg string `default:"" help:"Traces sampler argument for traceidratio strategy."` + Enable bool `default:"false" help:"Enable OpenTelemetry."` + OTLPEndpoint string `default:"127.0.0.1:4318" help:"OTLP exporter endpoint."` + TracesSampler string `default:"always_on" help:"Traces sampler settings." enum:"always_on,always_off,traceidratio"` //nolint:lll // for readability + TracesSamplerArg string `default:"" help:"Traces sampler argument for traceidratio strategy."` + BSPScheduleDelay time.Duration `default:"5s" help:"BatchSpanProcessor maximum delay."` } `embed:"" prefix:"otel-"` } @@ -324,7 +324,14 @@ func setupTracer() observability.ShutdownFunc { return nil } - shutdown, err := observability.SetupOtel("ferretdb", cli.OTEL.OTLPEndpoint) + shutdown, err := observability.SetupOtel(observability.Config{ + Service: "ferretdb", + Version: version.Get().Version, + Endpoint: cli.OTEL.OTLPEndpoint, + TracesSampler: cli.OTEL.TracesSampler, + TracesSamplerArg: cli.OTEL.TracesSamplerArg, + BSPDelay: cli.OTEL.BSPScheduleDelay, + }) if err != nil { log.Fatalf("Failed to set up OpenTelemetry: %s.", err) } @@ -405,14 +412,17 @@ func run() { logger := setupLogger(stateProvider, cli.Log.Format) otelShutdown := setupTracer() - defer func() { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - if err := otelShutdown(ctx); err != nil { - logger.Sugar().Errorf("Failed to shutdown OpenTelemetry: %s.", err) - } - }() + if otelShutdown != nil { + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := otelShutdown(ctx); err != nil { + logger.Sugar().Errorf("Failed to shutdown OpenTelemetry: %s.", err) + } + }() + } checkFlags(logger) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index ac511d2b335b..21c81b45a33b 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -104,7 +104,12 @@ func Startup() { zap.S().Infof("Compat system: none, compatibility tests will be skipped.") } - shutdownOtel = observability.SetupOtel("integration") + shutdownOtel = must.NotFail(observability.SetupOtel(observability.Config{ + Service: "integration-tests", + Endpoint: "http://localhost:4317", + TracesSampler: "always_on", + BSPDelay: 5 * time.Second, + })) } // Shutdown cleans up after all tests. diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 249d21d3bc3a..1666c379e493 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -33,15 +33,19 @@ import ( // Config is the configuration for OpenTelemetry. type Config struct { Service string + Version string Endpoint string TracesSampler string TracesSamplerArg string + BSPDelay time.Duration } // ShutdownFunc is a function that shuts down the OpenTelemetry observability system. type ShutdownFunc func(context.Context) error // SetupOtel sets up OTLP exporter and tracer provider. +// +// The function returns a shutdown function that should be called when the application is shutting down. func SetupOtel(config Config) (ShutdownFunc, error) { exporter, err := otlptracehttp.New( context.TODO(), @@ -71,10 +75,11 @@ func SetupOtel(config Config) (ShutdownFunc, error) { } tp := otelsdktrace.NewTracerProvider( - otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(time.Second)), + otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(config.BSPDelay)), otelsdktrace.WithSampler(sampler), otelsdktrace.WithResource(otelsdkresource.NewSchemaless( - otelsemconv.ServiceNameKey.String(config.Service), + otelsemconv.ServiceNamespace(config.Service), + otelsemconv.ServiceVersion(config.Version), )), ) From abcbc2553443d9d190022876c766d56bdc935fb8 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 08:05:44 +0200 Subject: [PATCH 05/30] removed experimentations --- cmd/envtool/tests.go | 2 +- integration/setup/startup.go | 2 +- internal/util/observability/observability.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index 9bb94fdeee69..7bbe6688d537 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -93,7 +93,7 @@ func resultKey(packageName, testName string) string { func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { shutdownOtel, err := observability.SetupOtel(observability.Config{ Service: "envtool tests", - Endpoint: "http://localhost:4317", + Endpoint: "127.0.0.1:4318", TracesSampler: "always_on", BSPDelay: 5 * time.Second, }) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index 21c81b45a33b..d019edf68975 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -106,7 +106,7 @@ func Startup() { shutdownOtel = must.NotFail(observability.SetupOtel(observability.Config{ Service: "integration-tests", - Endpoint: "http://localhost:4317", + Endpoint: "127.0.0.1:4318", TracesSampler: "always_on", BSPDelay: 5 * time.Second, })) diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 1666c379e493..3147fec31989 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -78,7 +78,7 @@ func SetupOtel(config Config) (ShutdownFunc, error) { otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(config.BSPDelay)), otelsdktrace.WithSampler(sampler), otelsdktrace.WithResource(otelsdkresource.NewSchemaless( - otelsemconv.ServiceNamespace(config.Service), + otelsemconv.ServiceName(config.Service), otelsemconv.ServiceVersion(config.Version), )), ) From c6a280c33f1ab45aaeea930b4542b6a6fc23f789 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 08:48:11 +0200 Subject: [PATCH 06/30] lint --- cmd/ferretdb/main.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 2fcb0c1aa493..8d809df6289d 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -128,11 +128,11 @@ var cli struct { } `embed:"" prefix:"test-"` OTEL struct { - Enable bool `default:"false" help:"Enable OpenTelemetry."` + Enable bool `default:"false" help:"Enable OpenTelemetry."` OTLPEndpoint string `default:"127.0.0.1:4318" help:"OTLP exporter endpoint."` - TracesSampler string `default:"always_on" help:"Traces sampler settings." enum:"always_on,always_off,traceidratio"` //nolint:lll // for readability - TracesSamplerArg string `default:"" help:"Traces sampler argument for traceidratio strategy."` - BSPScheduleDelay time.Duration `default:"5s" help:"BatchSpanProcessor maximum delay."` + TracesSampler string `default:"always_on" help:"Traces sampler settings." enum:"always_on,always_off,traceidratio"` //nolint:lll // for readability + TracesSamplerArg string `default:"" help:"Traces sampler argument for traceidratio strategy."` + BSPScheduleDelay time.Duration `default:"5s" help:"BatchSpanProcessor maximum delay."` } `embed:"" prefix:"otel-"` } From 4260134a81a580b526e50c07ab02c31a93dd3396 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 08:53:16 +0200 Subject: [PATCH 07/30] lint --- cmd/envtool/tests.go | 2 +- cmd/ferretdb/main.go | 4 ++-- internal/util/observability/observability.go | 15 ++++++++++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index 7bbe6688d537..0708015f69fd 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -95,7 +95,7 @@ func runGoTest(ctx context.Context, args []string, total int, times bool, logger Service: "envtool tests", Endpoint: "127.0.0.1:4318", TracesSampler: "always_on", - BSPDelay: 5 * time.Second, + BSPDelay: 5 * time.Second, //nolint:mnd // default timeout is enough }) if err != nil { return lazyerrors.Error(err) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 8d809df6289d..caa9c6b35be5 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -128,11 +128,11 @@ var cli struct { } `embed:"" prefix:"test-"` OTEL struct { - Enable bool `default:"false" help:"Enable OpenTelemetry."` OTLPEndpoint string `default:"127.0.0.1:4318" help:"OTLP exporter endpoint."` TracesSampler string `default:"always_on" help:"Traces sampler settings." enum:"always_on,always_off,traceidratio"` //nolint:lll // for readability TracesSamplerArg string `default:"" help:"Traces sampler argument for traceidratio strategy."` BSPScheduleDelay time.Duration `default:"5s" help:"BatchSpanProcessor maximum delay."` + Enable bool `default:"false" help:"Enable OpenTelemetry."` } `embed:"" prefix:"otel-"` } @@ -415,7 +415,7 @@ func run() { if otelShutdown != nil { defer func() { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) //nolint:mnd // default timeout is enough defer cancel() if err := otelShutdown(ctx); err != nil { diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 3147fec31989..2a571c2f19cf 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -24,6 +24,7 @@ import ( "time" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" otelsdkresource "go.opentelemetry.io/otel/sdk/resource" otelsdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -47,24 +48,28 @@ type ShutdownFunc func(context.Context) error // // The function returns a shutdown function that should be called when the application is shutting down. func SetupOtel(config Config) (ShutdownFunc, error) { - exporter, err := otlptracehttp.New( + var err error + var exporter *otlptrace.Exporter + + if exporter, err = otlptracehttp.New( context.TODO(), otlptracehttp.WithEndpoint(config.Endpoint), otlptracehttp.WithInsecure(), - ) - if err != nil { + ); err != nil { return nil, err } var sampler otelsdktrace.Sampler + switch config.TracesSampler { case "always_on": sampler = otelsdktrace.AlwaysSample() case "always_off": sampler = otelsdktrace.NeverSample() case "traceidratio": - ratio, err := strconv.ParseFloat(config.TracesSamplerArg, 64) - if err != nil { + var ratio float64 + + if ratio, err = strconv.ParseFloat(config.TracesSamplerArg, 64); err != nil { return nil, errors.New("unsupported trace ID ratio: " + config.TracesSamplerArg) } From 76de66d65fd7704373d4076e220e386051448be9 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 08:57:00 +0200 Subject: [PATCH 08/30] go mod --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index e00a24511c0d..d1abde4a785e 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/xdg-go/stringprep v1.0.4 go.mongodb.org/mongo-driver v1.15.1 go.opentelemetry.io/otel v1.27.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0 go.opentelemetry.io/otel/sdk v1.27.0 go.opentelemetry.io/otel/trace v1.27.0 @@ -61,7 +62,6 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect go.opentelemetry.io/otel/metric v1.27.0 // indirect go.opentelemetry.io/proto/otlp v1.2.0 // indirect go.uber.org/multierr v1.10.0 // indirect From 1cf86adb6e931335ee940830a89b7a67dc4de158 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 09:04:29 +0200 Subject: [PATCH 09/30] lint --- integration/setup/startup.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index d019edf68975..225b88aaee7b 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -108,7 +108,7 @@ func Startup() { Service: "integration-tests", Endpoint: "127.0.0.1:4318", TracesSampler: "always_on", - BSPDelay: 5 * time.Second, + BSPDelay: 5 * time.Second, //nolint:mnd // default timeout is enough })) } From 0060035fe30b033b49f01c87731c96d988e8655b Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 10:22:46 +0200 Subject: [PATCH 10/30] wip --- cmd/envtool/tests.go | 6 +-- cmd/ferretdb/main.go | 21 +++------ integration/setup/startup.go | 6 +-- internal/util/observability/observability.go | 47 +++++--------------- 4 files changed, 21 insertions(+), 59 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index 0708015f69fd..b6ad511f3087 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -92,10 +92,8 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { shutdownOtel, err := observability.SetupOtel(observability.Config{ - Service: "envtool tests", - Endpoint: "127.0.0.1:4318", - TracesSampler: "always_on", - BSPDelay: 5 * time.Second, //nolint:mnd // default timeout is enough + Service: "envtool tests", + Endpoint: "127.0.0.1:4318", }) if err != nil { return lazyerrors.Error(err) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index caa9c6b35be5..ec41736d6bfa 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -125,15 +125,9 @@ var cli struct { ReportTimeout time.Duration `default:"5s" help:"Telemetry: report timeout."` Package string `default:"" help:"Telemetry: custom package type."` } `embed:"" prefix:"telemetry-"` - } `embed:"" prefix:"test-"` - OTEL struct { - OTLPEndpoint string `default:"127.0.0.1:4318" help:"OTLP exporter endpoint."` - TracesSampler string `default:"always_on" help:"Traces sampler settings." enum:"always_on,always_off,traceidratio"` //nolint:lll // for readability - TracesSamplerArg string `default:"" help:"Traces sampler argument for traceidratio strategy."` - BSPScheduleDelay time.Duration `default:"5s" help:"BatchSpanProcessor maximum delay."` - Enable bool `default:"false" help:"Enable OpenTelemetry."` - } `embed:"" prefix:"otel-"` + OTLPEndpoint string `default:"" help:"OTLP exporter endpoint, if unset, OpenTelemetry is disabled."` + } `embed:"" prefix:"test-"` } // The postgreSQLFlags struct represents flags that are used by the "postgresql" backend. @@ -320,17 +314,14 @@ func setupLogger(stateProvider *state.Provider, format string) *zap.Logger { // setupTracer sets up OpenTelemetry exporter. func setupTracer() observability.ShutdownFunc { - if !cli.OTEL.Enable { + if cli.Test.OTLPEndpoint == "" { return nil } shutdown, err := observability.SetupOtel(observability.Config{ - Service: "ferretdb", - Version: version.Get().Version, - Endpoint: cli.OTEL.OTLPEndpoint, - TracesSampler: cli.OTEL.TracesSampler, - TracesSamplerArg: cli.OTEL.TracesSamplerArg, - BSPDelay: cli.OTEL.BSPScheduleDelay, + Service: "ferretdb", + Version: version.Get().Version, + Endpoint: cli.Test.OTLPEndpoint, }) if err != nil { log.Fatalf("Failed to set up OpenTelemetry: %s.", err) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index 225b88aaee7b..b35c95721322 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -105,10 +105,8 @@ func Startup() { } shutdownOtel = must.NotFail(observability.SetupOtel(observability.Config{ - Service: "integration-tests", - Endpoint: "127.0.0.1:4318", - TracesSampler: "always_on", - BSPDelay: 5 * time.Second, //nolint:mnd // default timeout is enough + Service: "integration-tests", + Endpoint: "127.0.0.1:4318", })) } diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 2a571c2f19cf..78fef4ce5da7 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -19,12 +19,9 @@ package observability import ( "context" - "errors" - "strconv" "time" "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" otelsdkresource "go.opentelemetry.io/otel/sdk/resource" otelsdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -33,12 +30,9 @@ import ( // Config is the configuration for OpenTelemetry. type Config struct { - Service string - Version string - Endpoint string - TracesSampler string - TracesSamplerArg string - BSPDelay time.Duration + Service string + Version string + Endpoint string } // ShutdownFunc is a function that shuts down the OpenTelemetry observability system. @@ -48,40 +42,21 @@ type ShutdownFunc func(context.Context) error // // The function returns a shutdown function that should be called when the application is shutting down. func SetupOtel(config Config) (ShutdownFunc, error) { - var err error - var exporter *otlptrace.Exporter - - if exporter, err = otlptracehttp.New( + exporter, err := otlptracehttp.New( context.TODO(), otlptracehttp.WithEndpoint(config.Endpoint), otlptracehttp.WithInsecure(), - ); err != nil { + ) + if err != nil { return nil, err } - var sampler otelsdktrace.Sampler - - switch config.TracesSampler { - case "always_on": - sampler = otelsdktrace.AlwaysSample() - case "always_off": - sampler = otelsdktrace.NeverSample() - case "traceidratio": - var ratio float64 - - if ratio, err = strconv.ParseFloat(config.TracesSamplerArg, 64); err != nil { - return nil, errors.New("unsupported trace ID ratio: " + config.TracesSamplerArg) - } - - sampler = otelsdktrace.TraceIDRatioBased(ratio) - - default: - return nil, errors.New("unsupported sampler") - } - + // Tracer is configured with the particular params on purpose. + // We don't want to let them being set through OTEL_* environment variables, + // but we set them explicitly. tp := otelsdktrace.NewTracerProvider( - otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(config.BSPDelay)), - otelsdktrace.WithSampler(sampler), + otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(time.Second)), + otelsdktrace.WithSampler(otelsdktrace.AlwaysSample()), otelsdktrace.WithResource(otelsdkresource.NewSchemaless( otelsemconv.ServiceName(config.Service), otelsemconv.ServiceVersion(config.Version), From 2522bb31639a9b7510ee1eb0e0f9dabadd149f94 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 10:27:04 +0200 Subject: [PATCH 11/30] wip --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index d1abde4a785e..e00a24511c0d 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,6 @@ require ( github.com/xdg-go/stringprep v1.0.4 go.mongodb.org/mongo-driver v1.15.1 go.opentelemetry.io/otel v1.27.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0 go.opentelemetry.io/otel/sdk v1.27.0 go.opentelemetry.io/otel/trace v1.27.0 @@ -62,6 +61,7 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect go.opentelemetry.io/otel/metric v1.27.0 // indirect go.opentelemetry.io/proto/otlp v1.2.0 // indirect go.uber.org/multierr v1.10.0 // indirect From bc6a5edf6ab56e4bc24f1f1d864cd4355f105546 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Tue, 25 Jun 2024 10:28:48 +0200 Subject: [PATCH 12/30] wip --- cmd/ferretdb/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index ec41736d6bfa..390498144072 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -406,7 +406,7 @@ func run() { if otelShutdown != nil { defer func() { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) //nolint:mnd // default timeout is enough + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) //nolint:mnd // simple shutdown timeout defer cancel() if err := otelShutdown(ctx); err != nil { From 1cabea7a12a14564f14da3b7b30383eb1e08e169 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Wed, 26 Jun 2024 22:40:46 +0200 Subject: [PATCH 13/30] wip --- cmd/envtool/tests.go | 2 +- cmd/ferretdb/main.go | 41 +++++++++++--------- internal/util/observability/observability.go | 6 +-- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index b6ad511f3087..baf3e3497f37 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -92,7 +92,7 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { shutdownOtel, err := observability.SetupOtel(observability.Config{ - Service: "envtool tests", + Service: "envtool-tests", Endpoint: "127.0.0.1:4318", }) if err != nil { diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 390498144072..d660c1985f83 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -118,6 +118,8 @@ var cli struct { BatchSize int `default:"100" help:"Experimental: maximum insertion batch size."` MaxBsonObjectSizeMiB int `default:"16" help:"Experimental: maximum BSON object size in MiB."` + OTLPEndpoint string `default:"" help:"Experimental: OTLP exporter endpoint, if unset, OpenTelemetry is disabled."` + Telemetry struct { URL string `default:"https://beacon.ferretdb.com/" help:"Telemetry: reporting URL."` UndecidedDelay time.Duration `default:"1h" help:"Telemetry: delay for undecided state."` @@ -125,8 +127,6 @@ var cli struct { ReportTimeout time.Duration `default:"5s" help:"Telemetry: report timeout."` Package string `default:"" help:"Telemetry: custom package type."` } `embed:"" prefix:"telemetry-"` - - OTLPEndpoint string `default:"" help:"OTLP exporter endpoint, if unset, OpenTelemetry is disabled."` } `embed:"" prefix:"test-"` } @@ -312,21 +312,25 @@ func setupLogger(stateProvider *state.Provider, format string) *zap.Logger { return l } -// setupTracer sets up OpenTelemetry exporter. -func setupTracer() observability.ShutdownFunc { +// setupTracer sets up OpenTelemetry tracer. +func setupTracer(logger *zap.Logger) observability.ShutdownFunc { if cli.Test.OTLPEndpoint == "" { return nil } + l := logger.Sugar() + shutdown, err := observability.SetupOtel(observability.Config{ Service: "ferretdb", Version: version.Get().Version, Endpoint: cli.Test.OTLPEndpoint, }) if err != nil { - log.Fatalf("Failed to set up OpenTelemetry: %s.", err) + l.Fatalf("Failed to set up OpenTelemetry: %s.", err) } + l.Infof("OpenTelemetry is enabled. OTLP exporter endpoint: %s.", cli.Test.OTLPEndpoint) + return shutdown } @@ -402,19 +406,6 @@ func run() { logger := setupLogger(stateProvider, cli.Log.Format) - otelShutdown := setupTracer() - - if otelShutdown != nil { - defer func() { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) //nolint:mnd // simple shutdown timeout - defer cancel() - - if err := otelShutdown(ctx); err != nil { - logger.Sugar().Errorf("Failed to shutdown OpenTelemetry: %s.", err) - } - }() - } - checkFlags(logger) if _, err := maxprocs.Set(maxprocs.Logger(logger.Sugar().Debugf)); err != nil { @@ -429,6 +420,20 @@ func run() { stop() }() + otelShutdown := setupTracer(logger) + + if otelShutdown != nil { + defer func() { + shutdownCtx, shutdownCancel := ctxutil.WithDelay(ctx.Done(), 3*time.Second) //nolint:mnd // simple shutdown timeout + + if err := otelShutdown(shutdownCtx); err != nil { + logger.Sugar().Errorf("Failed to shutdown OpenTelemetry: %s.", err) + } + + shutdownCancel() + }() + } + var wg sync.WaitGroup if cli.DebugAddr != "" && cli.DebugAddr != "-" { diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 78fef4ce5da7..cea4a7e74ec3 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -42,6 +42,9 @@ type ShutdownFunc func(context.Context) error // // The function returns a shutdown function that should be called when the application is shutting down. func SetupOtel(config Config) (ShutdownFunc, error) { + // Exporter and tracer are configured with the particular params on purpose. + // We don't want to let them being set through OTEL_* environment variables, + // but we set them explicitly. exporter, err := otlptracehttp.New( context.TODO(), otlptracehttp.WithEndpoint(config.Endpoint), @@ -51,9 +54,6 @@ func SetupOtel(config Config) (ShutdownFunc, error) { return nil, err } - // Tracer is configured with the particular params on purpose. - // We don't want to let them being set through OTEL_* environment variables, - // but we set them explicitly. tp := otelsdktrace.NewTracerProvider( otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(time.Second)), otelsdktrace.WithSampler(otelsdktrace.AlwaysSample()), From 4df096f0f6fcb644054667405751bea0c09f5177 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Thu, 27 Jun 2024 13:11:30 +0200 Subject: [PATCH 14/30] wip --- cmd/ferretdb/main.go | 16 +++++++++++ internal/util/observability/observability.go | 29 +++++++++++++------- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index d660c1985f83..c645ad1b8de9 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -445,6 +445,22 @@ func run() { }() } + if cli.Test.OTLPEndpoint != "" { + wg.Add(1) + + go func() { + defer wg.Done() + + otelConf := observability.OtelConfig{ + Service: "ferretdb", + Version: version.Get().Version, + Endpoint: cli.Test.OTLPEndpoint, + } + + observability.RunOtel(ctx, otelConf, logger.Named("otel")) + }() + } + metrics := connmetrics.NewListenerMetrics() wg.Add(1) diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index cea4a7e74ec3..81faf417f5dc 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -26,22 +26,20 @@ import ( otelsdkresource "go.opentelemetry.io/otel/sdk/resource" otelsdktrace "go.opentelemetry.io/otel/sdk/trace" otelsemconv "go.opentelemetry.io/otel/semconv/v1.21.0" + zap "go.uber.org/zap" ) -// Config is the configuration for OpenTelemetry. -type Config struct { +// OtelConfig is the configuration for OpenTelemetry. +type OtelConfig struct { Service string Version string Endpoint string } -// ShutdownFunc is a function that shuts down the OpenTelemetry observability system. -type ShutdownFunc func(context.Context) error +// RunOtel runs the OpenTelemetry system with the given configuration. +func RunOtel(ctx context.Context, config OtelConfig, l *zap.Logger) { + logger := l.Sugar() -// SetupOtel sets up OTLP exporter and tracer provider. -// -// The function returns a shutdown function that should be called when the application is shutting down. -func SetupOtel(config Config) (ShutdownFunc, error) { // Exporter and tracer are configured with the particular params on purpose. // We don't want to let them being set through OTEL_* environment variables, // but we set them explicitly. @@ -51,7 +49,8 @@ func SetupOtel(config Config) (ShutdownFunc, error) { otlptracehttp.WithInsecure(), ) if err != nil { - return nil, err + logger.Errorf("Failed to create OTLP exporter: %s. OpenTelemetry won't be used.", err) + return } tp := otelsdktrace.NewTracerProvider( @@ -65,5 +64,15 @@ func SetupOtel(config Config) (ShutdownFunc, error) { otel.SetTracerProvider(tp) - return tp.Shutdown, nil + <-ctx.Done() + + stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) + defer stopCancel() + + if err := tp.Shutdown(stopCtx); err != nil { + logger.Errorf("Error while shutdown OpenTelemetry system: %v", err) + return + } + + logger.Info("OpenTelemetry system stopped successfully.") } From 10ee683408910e3cd0c534a8025331c85b7eb102 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Thu, 27 Jun 2024 15:38:59 +0200 Subject: [PATCH 15/30] wip --- cmd/envtool/tests.go | 14 +----- cmd/ferretdb/main.go | 51 +++----------------- integration/setup/startup.go | 27 ++++------- internal/util/observability/observability.go | 14 +++--- 4 files changed, 27 insertions(+), 79 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index baf3e3497f37..a6a6bb313779 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -91,22 +91,12 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { - shutdownOtel, err := observability.SetupOtel(observability.Config{ + otelConf := observability.OtelConfig{ Service: "envtool-tests", Endpoint: "127.0.0.1:4318", - }) - if err != nil { - return lazyerrors.Error(err) } - defer func() { - shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) - defer shutdownCancel() - - if err := shutdownOtel(shutdownCtx); err != nil { - logger.Error(err) - } - }() + go observability.RunOtel(ctx, otelConf, logger) cmd := exec.CommandContext(ctx, "go", append([]string{"test", "-json"}, args...)...) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index c645ad1b8de9..7fbff65e6891 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -312,28 +312,6 @@ func setupLogger(stateProvider *state.Provider, format string) *zap.Logger { return l } -// setupTracer sets up OpenTelemetry tracer. -func setupTracer(logger *zap.Logger) observability.ShutdownFunc { - if cli.Test.OTLPEndpoint == "" { - return nil - } - - l := logger.Sugar() - - shutdown, err := observability.SetupOtel(observability.Config{ - Service: "ferretdb", - Version: version.Get().Version, - Endpoint: cli.Test.OTLPEndpoint, - }) - if err != nil { - l.Fatalf("Failed to set up OpenTelemetry: %s.", err) - } - - l.Infof("OpenTelemetry is enabled. OTLP exporter endpoint: %s.", cli.Test.OTLPEndpoint) - - return shutdown -} - // checkFlags checks that CLI flags are not self-contradictory. func checkFlags(logger *zap.Logger) { l := logger.Sugar() @@ -420,20 +398,6 @@ func run() { stop() }() - otelShutdown := setupTracer(logger) - - if otelShutdown != nil { - defer func() { - shutdownCtx, shutdownCancel := ctxutil.WithDelay(ctx.Done(), 3*time.Second) //nolint:mnd // simple shutdown timeout - - if err := otelShutdown(shutdownCtx); err != nil { - logger.Sugar().Errorf("Failed to shutdown OpenTelemetry: %s.", err) - } - - shutdownCancel() - }() - } - var wg sync.WaitGroup if cli.DebugAddr != "" && cli.DebugAddr != "-" { @@ -448,16 +412,15 @@ func run() { if cli.Test.OTLPEndpoint != "" { wg.Add(1) + otelConf := observability.OtelConfig{ + Service: "ferretdb", + Version: version.Get().Version, + Endpoint: cli.Test.OTLPEndpoint, + } + go func() { defer wg.Done() - - otelConf := observability.OtelConfig{ - Service: "ferretdb", - Version: version.Get().Version, - Endpoint: cli.Test.OTLPEndpoint, - } - - observability.RunOtel(ctx, otelConf, logger.Named("otel")) + observability.RunOtel(ctx, otelConf, logger.Named("otel").Sugar()) }() } diff --git a/integration/setup/startup.go b/integration/setup/startup.go index ed5eb069d328..dc7ddecb7bb8 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -22,24 +22,20 @@ import ( "strconv" "time" + "github.com/FerretDB/FerretDB/internal/util/observability" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" + "github.com/FerretDB/FerretDB/integration/shareddata" "github.com/FerretDB/FerretDB/internal/clientconn/connmetrics" "github.com/FerretDB/FerretDB/internal/util/debug" "github.com/FerretDB/FerretDB/internal/util/logging" - "github.com/FerretDB/FerretDB/internal/util/must" - "github.com/FerretDB/FerretDB/internal/util/observability" - - "github.com/FerretDB/FerretDB/integration/shareddata" ) // listenerMetrics are shared between tests. var listenerMetrics = connmetrics.NewListenerMetrics() -// shutdownOtel is a function that stops OpenTelemetry provider. -var shutdownOtel func(context.Context) error - // Startup initializes things that should be initialized only once. func Startup() { logging.Setup(zap.DebugLevel, "console", "") @@ -55,6 +51,13 @@ func Startup() { // use any available port to allow running different configurations in parallel go debug.RunHandler(context.Background(), "127.0.0.1:0", prometheus.DefaultRegisterer, zap.L().Named("debug")) + otelConf := observability.OtelConfig{ + Service: "integration-tests", + Endpoint: "127.0.0.1:4318", + } + + go observability.RunOtel(context.Background(), otelConf, zap.L().Named("otel").Sugar()) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -117,20 +120,10 @@ func Startup() { } else { zap.S().Infof("Compat system: none, compatibility tests will be skipped.") } - - shutdownOtel = must.NotFail(observability.SetupOtel(observability.Config{ - Service: "integration-tests", - Endpoint: "127.0.0.1:4318", - })) } // Shutdown cleans up after all tests. func Shutdown() { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - must.NoError(shutdownOtel(ctx)) - // to increase a chance of resource finalizers to spot problems runtime.GC() runtime.GC() diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 81faf417f5dc..261d3537cf92 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -22,11 +22,12 @@ import ( "time" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" otelsdkresource "go.opentelemetry.io/otel/sdk/resource" otelsdktrace "go.opentelemetry.io/otel/sdk/trace" otelsemconv "go.opentelemetry.io/otel/semconv/v1.21.0" - zap "go.uber.org/zap" + "go.uber.org/zap" ) // OtelConfig is the configuration for OpenTelemetry. @@ -37,13 +38,14 @@ type OtelConfig struct { } // RunOtel runs the OpenTelemetry system with the given configuration. -func RunOtel(ctx context.Context, config OtelConfig, l *zap.Logger) { - logger := l.Sugar() +func RunOtel(ctx context.Context, config OtelConfig, logger *zap.SugaredLogger) { + var exporter *otlptrace.Exporter + var err error // Exporter and tracer are configured with the particular params on purpose. // We don't want to let them being set through OTEL_* environment variables, // but we set them explicitly. - exporter, err := otlptracehttp.New( + exporter, err = otlptracehttp.New( context.TODO(), otlptracehttp.WithEndpoint(config.Endpoint), otlptracehttp.WithInsecure(), @@ -66,10 +68,10 @@ func RunOtel(ctx context.Context, config OtelConfig, l *zap.Logger) { <-ctx.Done() - stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) + stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) //nolint:mnd // Simple timeout defer stopCancel() - if err := tp.Shutdown(stopCtx); err != nil { + if err = tp.Shutdown(stopCtx); err != nil { logger.Errorf("Error while shutdown OpenTelemetry system: %v", err) return } From 2d2c1faa5c79699e5cf296a306e6aec2a424a4b1 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Thu, 27 Jun 2024 17:38:03 +0200 Subject: [PATCH 16/30] wip --- integration/setup/startup.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index dc7ddecb7bb8..c1c555ce67fe 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -22,15 +22,15 @@ import ( "strconv" "time" - "github.com/FerretDB/FerretDB/internal/util/observability" - "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" - "github.com/FerretDB/FerretDB/integration/shareddata" "github.com/FerretDB/FerretDB/internal/clientconn/connmetrics" "github.com/FerretDB/FerretDB/internal/util/debug" "github.com/FerretDB/FerretDB/internal/util/logging" + "github.com/FerretDB/FerretDB/internal/util/observability" + + "github.com/FerretDB/FerretDB/integration/shareddata" ) // listenerMetrics are shared between tests. From 4d252a55e6502fd7ea939d30bdaeda1fe80055da Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Thu, 27 Jun 2024 17:50:05 +0200 Subject: [PATCH 17/30] wip --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index e00a24511c0d..d1abde4a785e 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/xdg-go/stringprep v1.0.4 go.mongodb.org/mongo-driver v1.15.1 go.opentelemetry.io/otel v1.27.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0 go.opentelemetry.io/otel/sdk v1.27.0 go.opentelemetry.io/otel/trace v1.27.0 @@ -61,7 +62,6 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect go.opentelemetry.io/otel/metric v1.27.0 // indirect go.opentelemetry.io/proto/otlp v1.2.0 // indirect go.uber.org/multierr v1.10.0 // indirect From 84dbce4290ecfc5c73db0c416fd97f26e75a82ba Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Fri, 28 Jun 2024 11:34:25 +0200 Subject: [PATCH 18/30] wip --- Taskfile.yml | 1 + internal/util/observability/observability.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Taskfile.yml b/Taskfile.yml index 6991eca70f64..00e9f308bf2f 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -364,6 +364,7 @@ tasks: --handler=pg --postgresql-url='postgres://username@127.0.0.1:5432/ferretdb?search_path=' --test-records-dir=tmp/records + --test-otlp-endpoint=127.0.0.1:4318 run-sqlite: desc: "Run FerretDB with `sqlite` backend" diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 261d3537cf92..3e7866dd94e7 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -66,6 +66,8 @@ func RunOtel(ctx context.Context, config OtelConfig, logger *zap.SugaredLogger) otel.SetTracerProvider(tp) + logger.Infof("OpenTelemetry system started successfully, exporter endpoint is set as %s", config.Endpoint) + <-ctx.Done() stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) //nolint:mnd // Simple timeout From bfad76afcdcaf0f521dadcc080ce7ece29a7d01e Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Fri, 28 Jun 2024 11:53:51 +0200 Subject: [PATCH 19/30] wip --- internal/util/observability/observability.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 3e7866dd94e7..4a92b7f2e906 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -51,7 +51,7 @@ func RunOtel(ctx context.Context, config OtelConfig, logger *zap.SugaredLogger) otlptracehttp.WithInsecure(), ) if err != nil { - logger.Errorf("Failed to create OTLP exporter: %s. OpenTelemetry won't be used.", err) + logger.Errorf("Failed to create OTLP exporter: %v. OpenTelemetry won't be used.", err) return } From 1b6b9efa01ab028c548642dfde32256f9a3889ff Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Fri, 28 Jun 2024 22:09:22 +0200 Subject: [PATCH 20/30] current date (without integration tests) --- cmd/envtool/tests.go | 7 +++- cmd/ferretdb/main.go | 20 ++++++++---- integration/setup/startup.go | 9 ++++-- internal/util/observability/observability.go | 34 +++++++++++++++----- 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index a6a6bb313779..4b5ff419bd8a 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -96,7 +96,12 @@ func runGoTest(ctx context.Context, args []string, total int, times bool, logger Endpoint: "127.0.0.1:4318", } - go observability.RunOtel(ctx, otelConf, logger) + ot, err := observability.NewOtel(&otelConf, logger.Desugar()) + if err != nil { + return lazyerrors.Error(err) + } + + go ot.Run(ctx) cmd := exec.CommandContext(ctx, "go", append([]string{"test", "-json"}, args...)...) diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 7fbff65e6891..34659e5dd491 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -410,18 +410,26 @@ func run() { } if cli.Test.OTLPEndpoint != "" { - wg.Add(1) - otelConf := observability.OtelConfig{ Service: "ferretdb", Version: version.Get().Version, Endpoint: cli.Test.OTLPEndpoint, } - go func() { - defer wg.Done() - observability.RunOtel(ctx, otelConf, logger.Named("otel").Sugar()) - }() + otLogger := logger.Named("otel") + + ot, err := observability.NewOtel(&otelConf, otLogger) + if err != nil { + otLogger.Error("Failed to create OpenTelemetry system.", zap.Error(err)) + stop() + } else { + wg.Add(1) + + go func() { + defer wg.Done() + ot.Run(ctx) + }() + } } metrics := connmetrics.NewListenerMetrics() diff --git a/integration/setup/startup.go b/integration/setup/startup.go index c1c555ce67fe..b7758fd91911 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -56,9 +56,14 @@ func Startup() { Endpoint: "127.0.0.1:4318", } - go observability.RunOtel(context.Background(), otelConf, zap.L().Named("otel").Sugar()) + ot, err := observability.NewOtel(&otelConf, zap.L().Named("otel")) + if err != nil { + zap.S().Fatal(err) + } + + go ot.Run(context.Background()) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) defer cancel() // do basic flags validation earlier, before all tests diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 4a92b7f2e906..c7862628d36f 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -19,6 +19,7 @@ package observability import ( "context" + "errors" "time" "go.opentelemetry.io/otel" @@ -37,8 +38,18 @@ type OtelConfig struct { Endpoint string } -// RunOtel runs the OpenTelemetry system with the given configuration. -func RunOtel(ctx context.Context, config OtelConfig, logger *zap.SugaredLogger) { +// Otel represents the OpenTelemetry system. +type Otel struct { + l *zap.Logger + tp *otelsdktrace.TracerProvider +} + +// NewOtel sets up OTLP exporter and tracer provider. +func NewOtel(config *OtelConfig, l *zap.Logger) (*Otel, error) { + if config.Endpoint == "" { + return nil, errors.New("endpoint is required") + } + var exporter *otlptrace.Exporter var err error @@ -51,8 +62,7 @@ func RunOtel(ctx context.Context, config OtelConfig, logger *zap.SugaredLogger) otlptracehttp.WithInsecure(), ) if err != nil { - logger.Errorf("Failed to create OTLP exporter: %v. OpenTelemetry won't be used.", err) - return + return nil, err } tp := otelsdktrace.NewTracerProvider( @@ -66,17 +76,25 @@ func RunOtel(ctx context.Context, config OtelConfig, logger *zap.SugaredLogger) otel.SetTracerProvider(tp) - logger.Infof("OpenTelemetry system started successfully, exporter endpoint is set as %s", config.Endpoint) + return &Otel{ + l: l, + tp: tp, + }, nil +} + +// Run runs the OpenTelemetry system. +func (o *Otel) Run(ctx context.Context) { + o.l.Info("OpenTelemetry system started successfully.") <-ctx.Done() stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) //nolint:mnd // Simple timeout defer stopCancel() - if err = tp.Shutdown(stopCtx); err != nil { - logger.Errorf("Error while shutdown OpenTelemetry system: %v", err) + if err := o.tp.Shutdown(stopCtx); err != nil { + o.l.Error("Error while shutdown OpenTelemetry system.", zap.Error(err)) return } - logger.Info("OpenTelemetry system stopped successfully.") + o.l.Info("OpenTelemetry system stopped successfully.") } From 0164d9bde1a745ba73a6f266e90b76a14bd1b371 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Fri, 28 Jun 2024 22:10:32 +0200 Subject: [PATCH 21/30] typo --- integration/setup/startup.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index b7758fd91911..fd6a552d1a60 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -61,9 +61,9 @@ func Startup() { zap.S().Fatal(err) } - go ot.Run(context.Background()) + go ot.Run(context.TODO()) - ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() // do basic flags validation earlier, before all tests From ad44aaa479861af10aca0340890e93b4b0a5d7cd Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 1 Jul 2024 16:45:25 +0200 Subject: [PATCH 22/30] wip --- cmd/envtool/tests.go | 4 +-- cmd/ferretdb/main.go | 4 +-- integration/setup/startup.go | 6 +++++ internal/util/observability/observability.go | 26 ++++++++++---------- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index 4b5ff419bd8a..fe7a35971cef 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -91,12 +91,12 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { - otelConf := observability.OtelConfig{ + otConfig := observability.OtelTracerConfig{ Service: "envtool-tests", Endpoint: "127.0.0.1:4318", } - ot, err := observability.NewOtel(&otelConf, logger.Desugar()) + ot, err := observability.NewOtelTracer(&otConfig, logger.Desugar()) if err != nil { return lazyerrors.Error(err) } diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 34659e5dd491..e9dd922f818a 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -410,7 +410,7 @@ func run() { } if cli.Test.OTLPEndpoint != "" { - otelConf := observability.OtelConfig{ + otConf := observability.OtelTracerConfig{ Service: "ferretdb", Version: version.Get().Version, Endpoint: cli.Test.OTLPEndpoint, @@ -418,7 +418,7 @@ func run() { otLogger := logger.Named("otel") - ot, err := observability.NewOtel(&otelConf, otLogger) + ot, err := observability.NewOtelTracer(&otConf, otLogger) if err != nil { otLogger.Error("Failed to create OpenTelemetry system.", zap.Error(err)) stop() diff --git a/integration/setup/startup.go b/integration/setup/startup.go index fd6a552d1a60..062f6961d352 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -36,6 +36,9 @@ import ( // listenerMetrics are shared between tests. var listenerMetrics = connmetrics.NewListenerMetrics() +// shutdownOtel is a function that stops OpenTelemetry's tracer provider. +var shutdownOtel context.CancelFunc + // Startup initializes things that should be initialized only once. func Startup() { logging.Setup(zap.DebugLevel, "console", "") @@ -129,6 +132,9 @@ func Startup() { // Shutdown cleans up after all tests. func Shutdown() { + shutdownOtel() + // TODO how do we check that shutdown is complete? + // to increase a chance of resource finalizers to spot problems runtime.GC() runtime.GC() diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index c7862628d36f..b2446d0dc9e2 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -31,21 +31,21 @@ import ( "go.uber.org/zap" ) -// OtelConfig is the configuration for OpenTelemetry. -type OtelConfig struct { +// OtelTracerConfig is the configuration for OpenTelemetry. +type OtelTracerConfig struct { Service string Version string Endpoint string } -// Otel represents the OpenTelemetry system. -type Otel struct { +// OtelTracer represents the OpenTelemetry tracer. +type OtelTracer struct { l *zap.Logger tp *otelsdktrace.TracerProvider } -// NewOtel sets up OTLP exporter and tracer provider. -func NewOtel(config *OtelConfig, l *zap.Logger) (*Otel, error) { +// NewOtelTracer sets up OTLP tracer provider. +func NewOtelTracer(config *OtelTracerConfig, l *zap.Logger) (*OtelTracer, error) { if config.Endpoint == "" { return nil, errors.New("endpoint is required") } @@ -76,25 +76,25 @@ func NewOtel(config *OtelConfig, l *zap.Logger) (*Otel, error) { otel.SetTracerProvider(tp) - return &Otel{ + return &OtelTracer{ l: l, tp: tp, }, nil } -// Run runs the OpenTelemetry system. -func (o *Otel) Run(ctx context.Context) { - o.l.Info("OpenTelemetry system started successfully.") +// Run runs the OpenTelemetry tracer while the context is open. +func (ot *OtelTracer) Run(ctx context.Context) { + ot.l.Info("OpenTelemetry system started successfully.") <-ctx.Done() stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) //nolint:mnd // Simple timeout defer stopCancel() - if err := o.tp.Shutdown(stopCtx); err != nil { - o.l.Error("Error while shutdown OpenTelemetry system.", zap.Error(err)) + if err := ot.tp.Shutdown(stopCtx); err != nil { + ot.l.Error("Error while shutdown OpenTelemetry system.", zap.Error(err)) return } - o.l.Info("OpenTelemetry system stopped successfully.") + ot.l.Info("OpenTelemetry system stopped successfully.") } From d5cc6aa45bf6c335369bc3a3d1e7ccdd594533f5 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 1 Jul 2024 16:52:12 +0200 Subject: [PATCH 23/30] wip --- cmd/envtool/tests.go | 5 +++-- cmd/ferretdb/main.go | 9 ++++----- integration/setup/startup.go | 5 +++-- internal/util/observability/observability.go | 18 ++++++++++-------- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index fe7a35971cef..cd49bb02a0fd 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -91,12 +91,13 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { - otConfig := observability.OtelTracerConfig{ + otOpts := observability.OtelTracerOpts{ Service: "envtool-tests", Endpoint: "127.0.0.1:4318", + Logger: logger.Desugar(), } - ot, err := observability.NewOtelTracer(&otConfig, logger.Desugar()) + ot, err := observability.NewOtelTracer(&otOpts) if err != nil { return lazyerrors.Error(err) } diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index e9dd922f818a..d59888bdd73a 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -410,17 +410,16 @@ func run() { } if cli.Test.OTLPEndpoint != "" { - otConf := observability.OtelTracerConfig{ + otOpts := observability.OtelTracerOpts{ Service: "ferretdb", Version: version.Get().Version, Endpoint: cli.Test.OTLPEndpoint, + Logger: logger.Named("otel"), } - otLogger := logger.Named("otel") - - ot, err := observability.NewOtelTracer(&otConf, otLogger) + ot, err := observability.NewOtelTracer(&otOpts) if err != nil { - otLogger.Error("Failed to create OpenTelemetry system.", zap.Error(err)) + otOpts.Logger.Error("Failed to create OpenTelemetry system.", zap.Error(err)) stop() } else { wg.Add(1) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index 062f6961d352..5ab115d7e93a 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -54,12 +54,13 @@ func Startup() { // use any available port to allow running different configurations in parallel go debug.RunHandler(context.Background(), "127.0.0.1:0", prometheus.DefaultRegisterer, zap.L().Named("debug")) - otelConf := observability.OtelConfig{ + otOpts := observability.OtelTracerOpts{ Service: "integration-tests", Endpoint: "127.0.0.1:4318", + Logger: zap.L().Named("otel"), } - ot, err := observability.NewOtel(&otelConf, zap.L().Named("otel")) + ot, err := observability.NewOtelTracer(&otOpts) if err != nil { zap.S().Fatal(err) } diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index b2446d0dc9e2..a6e4b5a7ffc7 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -31,8 +31,10 @@ import ( "go.uber.org/zap" ) -// OtelTracerConfig is the configuration for OpenTelemetry. -type OtelTracerConfig struct { +// OtelTracerOpts is the configuration for OpenTelemetry. +type OtelTracerOpts struct { + Logger *zap.Logger + Service string Version string Endpoint string @@ -45,8 +47,8 @@ type OtelTracer struct { } // NewOtelTracer sets up OTLP tracer provider. -func NewOtelTracer(config *OtelTracerConfig, l *zap.Logger) (*OtelTracer, error) { - if config.Endpoint == "" { +func NewOtelTracer(opts *OtelTracerOpts) (*OtelTracer, error) { + if opts.Endpoint == "" { return nil, errors.New("endpoint is required") } @@ -58,7 +60,7 @@ func NewOtelTracer(config *OtelTracerConfig, l *zap.Logger) (*OtelTracer, error) // but we set them explicitly. exporter, err = otlptracehttp.New( context.TODO(), - otlptracehttp.WithEndpoint(config.Endpoint), + otlptracehttp.WithEndpoint(opts.Endpoint), otlptracehttp.WithInsecure(), ) if err != nil { @@ -69,15 +71,15 @@ func NewOtelTracer(config *OtelTracerConfig, l *zap.Logger) (*OtelTracer, error) otelsdktrace.WithBatcher(exporter, otelsdktrace.WithBatchTimeout(time.Second)), otelsdktrace.WithSampler(otelsdktrace.AlwaysSample()), otelsdktrace.WithResource(otelsdkresource.NewSchemaless( - otelsemconv.ServiceName(config.Service), - otelsemconv.ServiceVersion(config.Version), + otelsemconv.ServiceName(opts.Service), + otelsemconv.ServiceVersion(opts.Version), )), ) otel.SetTracerProvider(tp) return &OtelTracer{ - l: l, + l: opts.Logger, tp: tp, }, nil } From dcb098e22290483567f7b4bb3f08a5229e205cc3 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 1 Jul 2024 17:00:50 +0200 Subject: [PATCH 24/30] wip --- integration/setup/startup.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/integration/setup/startup.go b/integration/setup/startup.go index 5ab115d7e93a..02573bc88c5f 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -39,6 +39,9 @@ var listenerMetrics = connmetrics.NewListenerMetrics() // shutdownOtel is a function that stops OpenTelemetry's tracer provider. var shutdownOtel context.CancelFunc +// otelDone is a channel that is closed when OpenTelemetry's tracer provider is stopped. +var otelDone = make(chan struct{}) + // Startup initializes things that should be initialized only once. func Startup() { logging.Setup(zap.DebugLevel, "console", "") @@ -65,7 +68,10 @@ func Startup() { zap.S().Fatal(err) } - go ot.Run(context.TODO()) + go func() { + ot.Run(context.TODO()) + otelDone <- struct{}{} + }() ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -134,7 +140,16 @@ func Startup() { // Shutdown cleans up after all tests. func Shutdown() { shutdownOtel() - // TODO how do we check that shutdown is complete? + + t := time.NewTimer(3 * time.Second) + defer t.Stop() + + select { + case <-otelDone: + // do nothing + case <-t.C: + zap.S().Warn("OpenTelemetry system shutdown timeout.") + } // to increase a chance of resource finalizers to spot problems runtime.GC() From bbab99ffe2fa891cdbf7d7a3644d757bc434d543 Mon Sep 17 00:00:00 2001 From: Alexey Palazhchenko Date: Mon, 1 Jul 2024 19:47:14 +0400 Subject: [PATCH 25/30] Refactor --- internal/util/observability/observability.go | 52 ++++++++++++-------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index a6e4b5a7ffc7..1f31d602e54d 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -20,10 +20,11 @@ package observability import ( "context" "errors" + "sync/atomic" "time" + "github.com/FerretDB/FerretDB/internal/util/ctxutil" "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" otelsdkresource "go.opentelemetry.io/otel/sdk/resource" otelsdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -31,7 +32,16 @@ import ( "go.uber.org/zap" ) -// OtelTracerOpts is the configuration for OpenTelemetry. +// setup ensures that OTLP tracer is set up only once. +var setup atomic.Bool + +// OtelTracer represents the OTLP tracer. +type OtelTracer struct { + l *zap.Logger + tp *otelsdktrace.TracerProvider +} + +// OtelTracerOpts is the configuration for OtelTracer. type OtelTracerOpts struct { Logger *zap.Logger @@ -40,25 +50,21 @@ type OtelTracerOpts struct { Endpoint string } -// OtelTracer represents the OpenTelemetry tracer. -type OtelTracer struct { - l *zap.Logger - tp *otelsdktrace.TracerProvider -} - -// NewOtelTracer sets up OTLP tracer provider. +// NewOtelTracer sets up OTLP tracer. func NewOtelTracer(opts *OtelTracerOpts) (*OtelTracer, error) { + if setup.Swap(true) { + panic("OTLP tracer is already set up") + } + if opts.Endpoint == "" { return nil, errors.New("endpoint is required") } - var exporter *otlptrace.Exporter - var err error - // Exporter and tracer are configured with the particular params on purpose. // We don't want to let them being set through OTEL_* environment variables, // but we set them explicitly. - exporter, err = otlptracehttp.New( + + exporter, err := otlptracehttp.New( context.TODO(), otlptracehttp.WithEndpoint(opts.Endpoint), otlptracehttp.WithInsecure(), @@ -84,19 +90,23 @@ func NewOtelTracer(opts *OtelTracerOpts) (*OtelTracer, error) { }, nil } -// Run runs the OpenTelemetry tracer while the context is open. +// Run runs OTLP tracer until ctx is canceled. func (ot *OtelTracer) Run(ctx context.Context) { - ot.l.Info("OpenTelemetry system started successfully.") + ot.l.Info("OTLP tracer started successfully.") <-ctx.Done() - stopCtx, stopCancel := context.WithTimeout(context.Background(), 3*time.Second) //nolint:mnd // Simple timeout - defer stopCancel() + // ctx is already canceled, but we want to inherit its values + shutdownCtx, shutdownCancel := ctxutil.WithDelay(ctx) + defer shutdownCancel(nil) + + if err := ot.tp.ForceFlush(shutdownCtx); err != nil { + ot.l.DPanic("ForceFlush exited with unexpected error", zap.Error(err)) + } - if err := ot.tp.Shutdown(stopCtx); err != nil { - ot.l.Error("Error while shutdown OpenTelemetry system.", zap.Error(err)) - return + if err := ot.tp.Shutdown(shutdownCtx); err != nil { + ot.l.DPanic("Shutdown exited with unexpected error", zap.Error(err)) } - ot.l.Info("OpenTelemetry system stopped successfully.") + ot.l.Info("OTLP tracer stopped.") } From c575e05da625d005dbb87d0cdd1d2520c6677c15 Mon Sep 17 00:00:00 2001 From: Alexey Palazhchenko Date: Mon, 1 Jul 2024 19:54:17 +0400 Subject: [PATCH 26/30] Refactor --- cmd/envtool/tests.go | 8 ++--- cmd/ferretdb/main.go | 36 ++++++++++---------- integration/setup/startup.go | 7 ++-- internal/util/observability/observability.go | 3 +- 4 files changed, 26 insertions(+), 28 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index cd49bb02a0fd..c56962d12bfe 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -91,13 +91,11 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { - otOpts := observability.OtelTracerOpts{ + ot, err := observability.NewOtelTracer(&observability.OtelTracerOpts{ + Logger: logger.Desugar(), Service: "envtool-tests", Endpoint: "127.0.0.1:4318", - Logger: logger.Desugar(), - } - - ot, err := observability.NewOtelTracer(&otOpts) + }) if err != nil { return lazyerrors.Error(err) } diff --git a/cmd/ferretdb/main.go b/cmd/ferretdb/main.go index 90cc5b4f63df..d344587501fd 100644 --- a/cmd/ferretdb/main.go +++ b/cmd/ferretdb/main.go @@ -411,25 +411,25 @@ func run() { } if cli.Test.OTLPEndpoint != "" { - otOpts := observability.OtelTracerOpts{ - Service: "ferretdb", - Version: version.Get().Version, - Endpoint: cli.Test.OTLPEndpoint, - Logger: logger.Named("otel"), - } + wg.Add(1) - ot, err := observability.NewOtelTracer(&otOpts) - if err != nil { - otOpts.Logger.Error("Failed to create OpenTelemetry system.", zap.Error(err)) - stop() - } else { - wg.Add(1) - - go func() { - defer wg.Done() - ot.Run(ctx) - }() - } + go func() { + defer wg.Done() + + l := logger.Named("otel") + + ot, err := observability.NewOtelTracer(&observability.OtelTracerOpts{ + Logger: l, + Service: "ferretdb", + Version: version.Get().Version, + Endpoint: cli.Test.OTLPEndpoint, + }) + if err != nil { + l.Sugar().Fatalf("Failed to create Otel tracer: %s.", err) + } + + ot.Run(ctx) + }() } metrics := connmetrics.NewListenerMetrics() diff --git a/integration/setup/startup.go b/integration/setup/startup.go index 158e319fce1b..fe41c925554e 100644 --- a/integration/setup/startup.go +++ b/integration/setup/startup.go @@ -65,12 +65,11 @@ func Startup() { zap.S().Fatalf("Failed to create debug handler: %s.", err) } - otOpts := observability.OtelTracerOpts{ + ot, err := observability.NewOtelTracer(&observability.OtelTracerOpts{ + Logger: zap.L().Named("otel"), Service: "integration-tests", Endpoint: "127.0.0.1:4318", - Logger: zap.L().Named("otel"), - } - ot, err := observability.NewOtelTracer(&otOpts) + }) if err != nil { zap.S().Fatalf("Failed to create Otel tracer: %s.", err) } diff --git a/internal/util/observability/observability.go b/internal/util/observability/observability.go index 1f31d602e54d..b7e18535a729 100644 --- a/internal/util/observability/observability.go +++ b/internal/util/observability/observability.go @@ -23,13 +23,14 @@ import ( "sync/atomic" "time" - "github.com/FerretDB/FerretDB/internal/util/ctxutil" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" otelsdkresource "go.opentelemetry.io/otel/sdk/resource" otelsdktrace "go.opentelemetry.io/otel/sdk/trace" otelsemconv "go.opentelemetry.io/otel/semconv/v1.21.0" "go.uber.org/zap" + + "github.com/FerretDB/FerretDB/internal/util/ctxutil" ) // setup ensures that OTLP tracer is set up only once. From 9b87a427503465aa3b1bd1615687aee94b7de605 Mon Sep 17 00:00:00 2001 From: Alexey Palazhchenko Date: Mon, 1 Jul 2024 19:55:39 +0400 Subject: [PATCH 27/30] `task init` --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index d1abde4a785e..e00a24511c0d 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,6 @@ require ( github.com/xdg-go/stringprep v1.0.4 go.mongodb.org/mongo-driver v1.15.1 go.opentelemetry.io/otel v1.27.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0 go.opentelemetry.io/otel/sdk v1.27.0 go.opentelemetry.io/otel/trace v1.27.0 @@ -62,6 +61,7 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect go.opentelemetry.io/otel/metric v1.27.0 // indirect go.opentelemetry.io/proto/otlp v1.2.0 // indirect go.uber.org/multierr v1.10.0 // indirect From 5fd1d8e97c7cd07f611f566e42c766eab43b549e Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 1 Jul 2024 18:14:37 +0200 Subject: [PATCH 28/30] tracer should be somewhere else --- cmd/envtool/tests.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index c56962d12bfe..a53dc2cb5e8c 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -40,7 +40,6 @@ import ( "github.com/FerretDB/FerretDB/internal/util/lazyerrors" "github.com/FerretDB/FerretDB/internal/util/must" - "github.com/FerretDB/FerretDB/internal/util/observability" ) // testEvent represents a single even emitted by `go test -json`. @@ -91,17 +90,6 @@ func resultKey(packageName, testName string) string { // runGoTest runs `go test` with given extra args. func runGoTest(ctx context.Context, args []string, total int, times bool, logger *zap.SugaredLogger) error { - ot, err := observability.NewOtelTracer(&observability.OtelTracerOpts{ - Logger: logger.Desugar(), - Service: "envtool-tests", - Endpoint: "127.0.0.1:4318", - }) - if err != nil { - return lazyerrors.Error(err) - } - - go ot.Run(ctx) - cmd := exec.CommandContext(ctx, "go", append([]string{"test", "-json"}, args...)...) logger.Debugf("Running %s", strings.Join(cmd.Args, " ")) From e3b46643d87b5826f81c24e0f58e8d45177505b3 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 1 Jul 2024 18:23:17 +0200 Subject: [PATCH 29/30] wip --- cmd/envtool/tests.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index a53dc2cb5e8c..be99459083a2 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -31,6 +31,8 @@ import ( "strings" "time" + "github.com/FerretDB/FerretDB/internal/util/observability" + "go.opentelemetry.io/otel" otelattribute "go.opentelemetry.io/otel/attribute" otelcodes "go.opentelemetry.io/otel/codes" @@ -376,6 +378,17 @@ func testsRun(ctx context.Context, index, total uint, run, skip string, args []s args = append(args, "-skip="+skip) } + ot, err := observability.NewOtelTracer(&observability.OtelTracerOpts{ + Logger: logger.Desugar(), + Service: "envtool-tests", + Endpoint: "127.0.0.1:4318", + }) + if err != nil { + return lazyerrors.Error(err) + } + + go ot.Run(ctx) + return runGoTest(ctx, args, len(shard), true, logger) } From 22c9399cb3681283a1b489a9b89113f2598ed0c1 Mon Sep 17 00:00:00 2001 From: Elena Grahovac Date: Mon, 1 Jul 2024 18:25:55 +0200 Subject: [PATCH 30/30] liny --- cmd/envtool/tests.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmd/envtool/tests.go b/cmd/envtool/tests.go index be99459083a2..192eb030f382 100644 --- a/cmd/envtool/tests.go +++ b/cmd/envtool/tests.go @@ -31,8 +31,6 @@ import ( "strings" "time" - "github.com/FerretDB/FerretDB/internal/util/observability" - "go.opentelemetry.io/otel" otelattribute "go.opentelemetry.io/otel/attribute" otelcodes "go.opentelemetry.io/otel/codes" @@ -42,6 +40,7 @@ import ( "github.com/FerretDB/FerretDB/internal/util/lazyerrors" "github.com/FerretDB/FerretDB/internal/util/must" + "github.com/FerretDB/FerretDB/internal/util/observability" ) // testEvent represents a single even emitted by `go test -json`.