Skip to content

Commit

Permalink
Add debug timers to instant and range queries.
Browse files Browse the repository at this point in the history
This adds timers around several query-relevant code blocks. For now, the
query timer stats are only logged for queries initiated through the UI.
In other cases (rule evaluations), the stats are simply thrown away.

My hope is that this helps us understand where queries spend time,
especially in cases where they sometimes hang for unusual amounts of
time.
  • Loading branch information
juliusv committed Jun 5, 2013
1 parent c594d18 commit 51689d9
Show file tree
Hide file tree
Showing 12 changed files with 280 additions and 22 deletions.
3 changes: 2 additions & 1 deletion rules/alerting.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"github.com/prometheus/prometheus/utility"
"time"
Expand Down Expand Up @@ -90,7 +91,7 @@ type AlertingRule struct {
func (rule AlertingRule) Name() string { return rule.name }

func (rule AlertingRule) EvalRaw(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
return ast.EvalVectorInstant(rule.vector, timestamp, storage)
return ast.EvalVectorInstant(rule.vector, timestamp, storage, stats.NewTimerGroup())
}

func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
Expand Down
16 changes: 12 additions & 4 deletions rules/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"errors"
"fmt"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"log"
"math"
Expand Down Expand Up @@ -255,25 +256,29 @@ func labelsToKey(labels model.Metric) string {
return strings.Join(keyParts, ",") // TODO not safe when label value contains comma.
}

func EvalVectorInstant(node VectorNode, timestamp time.Time, storage *metric.TieredStorage) (vector Vector, err error) {
viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage)
func EvalVectorInstant(node VectorNode, timestamp time.Time, storage *metric.TieredStorage, queryStats *stats.TimerGroup) (vector Vector, err error) {
viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage, queryStats)
if err != nil {
return
}
vector = node.Eval(timestamp, viewAdapter)
return
}

func EvalVectorRange(node VectorNode, start time.Time, end time.Time, interval time.Duration, storage *metric.TieredStorage) (Matrix, error) {
func EvalVectorRange(node VectorNode, start time.Time, end time.Time, interval time.Duration, storage *metric.TieredStorage, queryStats *stats.TimerGroup) (Matrix, error) {
// Explicitly initialize to an empty matrix since a nil Matrix encodes to
// null in JSON.
matrix := Matrix{}

viewAdapter, err := viewAdapterForRangeQuery(node, start, end, interval, storage)
viewTimer := queryStats.GetTimer(stats.TotalViewBuildingTime).Start()
viewAdapter, err := viewAdapterForRangeQuery(node, start, end, interval, storage, queryStats)
viewTimer.Stop()
if err != nil {
return nil, err
}

// TODO implement watchdog timer for long-running queries.
evalTimer := queryStats.GetTimer(stats.InnerEvalTime).Start()
sampleSets := map[string]*model.SampleSet{}
for t := start; t.Before(end); t = t.Add(interval) {
vector := node.Eval(t, viewAdapter)
Expand All @@ -293,10 +298,13 @@ func EvalVectorRange(node VectorNode, start time.Time, end time.Time, interval t
}
}
}
evalTimer.Stop()

appendTimer := queryStats.GetTimer(stats.ResultAppendTime).Start()
for _, sampleSet := range sampleSets {
matrix = append(matrix, *sampleSet)
}
appendTimer.Stop()

return matrix, nil
}
Expand Down
12 changes: 11 additions & 1 deletion rules/ast/persistence_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package ast
import (
"flag"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"time"
)
Expand All @@ -39,6 +40,8 @@ type viewAdapter struct {
// The materialized view which contains all timeseries data required for
// executing a query.
view metric.View
// The TimerGroup object in which to capture query timing statistics.
stats *stats.TimerGroup
}

// interpolateSamples interpolates a value at a target time between two
Expand Down Expand Up @@ -105,6 +108,7 @@ func (v *viewAdapter) chooseClosestSample(samples model.Values, timestamp time.T
}

func (v *viewAdapter) GetValueAtTime(fingerprints model.Fingerprints, timestamp time.Time) (samples Vector, err error) {
timer := v.stats.GetTimer(stats.GetValueAtTimeTime).Start()
for _, fingerprint := range fingerprints {
sampleCandidates := v.view.GetValueAtTime(fingerprint, timestamp)
samplePair := v.chooseClosestSample(sampleCandidates, timestamp)
Expand All @@ -120,10 +124,12 @@ func (v *viewAdapter) GetValueAtTime(fingerprints model.Fingerprints, timestamp
})
}
}
timer.Stop()
return samples, err
}

func (v *viewAdapter) GetBoundaryValues(fingerprints model.Fingerprints, interval *model.Interval) (sampleSets []model.SampleSet, err error) {
timer := v.stats.GetTimer(stats.GetBoundaryValuesTime).Start()
for _, fingerprint := range fingerprints {
samplePairs := v.view.GetBoundaryValues(fingerprint, *interval)
if len(samplePairs) == 0 {
Expand All @@ -142,10 +148,12 @@ func (v *viewAdapter) GetBoundaryValues(fingerprints model.Fingerprints, interva
}
sampleSets = append(sampleSets, sampleSet)
}
timer.Stop()
return sampleSets, nil
}

func (v *viewAdapter) GetRangeValues(fingerprints model.Fingerprints, interval *model.Interval) (sampleSets []model.SampleSet, err error) {
timer := v.stats.GetTimer(stats.GetRangeValuesTime).Start()
for _, fingerprint := range fingerprints {
samplePairs := v.view.GetRangeValues(fingerprint, *interval)
if len(samplePairs) == 0 {
Expand All @@ -164,10 +172,11 @@ func (v *viewAdapter) GetRangeValues(fingerprints model.Fingerprints, interval *
}
sampleSets = append(sampleSets, sampleSet)
}
timer.Stop()
return sampleSets, nil
}

func NewViewAdapter(view metric.View, storage *metric.TieredStorage) *viewAdapter {
func NewViewAdapter(view metric.View, storage *metric.TieredStorage, queryStats *stats.TimerGroup) *viewAdapter {
stalenessPolicy := StalenessPolicy{
DeltaAllowance: time.Duration(*defaultStalenessDelta) * time.Second,
}
Expand All @@ -176,5 +185,6 @@ func NewViewAdapter(view metric.View, storage *metric.TieredStorage) *viewAdapte
stalenessPolicy: stalenessPolicy,
storage: storage,
view: view,
stats: queryStats,
}
}
12 changes: 10 additions & 2 deletions rules/ast/printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"encoding/json"
"fmt"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"github.com/prometheus/prometheus/utility"
"sort"
Expand Down Expand Up @@ -153,15 +154,19 @@ func TypedValueToJSON(data interface{}, typeStr string) string {
return string(dataJSON)
}

func EvalToString(node Node, timestamp time.Time, format OutputFormat, storage *metric.TieredStorage) string {
viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage)
func EvalToString(node Node, timestamp time.Time, format OutputFormat, storage *metric.TieredStorage, queryStats *stats.TimerGroup) string {
viewTimer := queryStats.GetTimer(stats.TotalViewBuildingTime).Start()
viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage, queryStats)
viewTimer.Stop()
if err != nil {
panic(err)
}

evalTimer := queryStats.GetTimer(stats.InnerEvalTime).Start()
switch node.Type() {
case SCALAR:
scalar := node.(ScalarNode).Eval(timestamp, viewAdapter)
evalTimer.Stop()
switch format {
case TEXT:
return fmt.Sprintf("scalar: %v", scalar)
Expand All @@ -170,6 +175,7 @@ func EvalToString(node Node, timestamp time.Time, format OutputFormat, storage *
}
case VECTOR:
vector := node.(VectorNode).Eval(timestamp, viewAdapter)
evalTimer.Stop()
switch format {
case TEXT:
return vector.String()
Expand All @@ -178,6 +184,7 @@ func EvalToString(node Node, timestamp time.Time, format OutputFormat, storage *
}
case MATRIX:
matrix := node.(MatrixNode).Eval(timestamp, viewAdapter)
evalTimer.Stop()
switch format {
case TEXT:
return matrix.String()
Expand All @@ -186,6 +193,7 @@ func EvalToString(node Node, timestamp time.Time, format OutputFormat, storage *
}
case STRING:
str := node.(StringNode).Eval(timestamp, viewAdapter)
evalTimer.Stop()
switch format {
case TEXT:
return str
Expand Down
29 changes: 23 additions & 6 deletions rules/ast/query_analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package ast

import (
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"log"
"time"
Expand Down Expand Up @@ -95,26 +96,38 @@ func (analyzer *QueryAnalyzer) AnalyzeQueries(node Node) {
}
}

func viewAdapterForInstantQuery(node Node, timestamp time.Time, storage *metric.TieredStorage) (*viewAdapter, error) {
func viewAdapterForInstantQuery(node Node, timestamp time.Time, storage *metric.TieredStorage, queryStats *stats.TimerGroup) (*viewAdapter, error) {
analyzeTimer := queryStats.GetTimer(stats.QueryAnalysisTime).Start()
analyzer := NewQueryAnalyzer(storage)
analyzer.AnalyzeQueries(node)
analyzeTimer.Stop()

requestBuildTimer := queryStats.GetTimer(stats.ViewRequestBuildTime).Start()
viewBuilder := metric.NewViewRequestBuilder()
for fingerprint, rangeDuration := range analyzer.FullRanges {
viewBuilder.GetMetricRange(fingerprint, timestamp.Add(-rangeDuration), timestamp)
}
for fingerprint := range analyzer.IntervalRanges {
viewBuilder.GetMetricAtTime(fingerprint, timestamp)
}
view, err := analyzer.storage.MakeView(viewBuilder, time.Duration(60)*time.Second)
requestBuildTimer.Stop()

buildTimer := queryStats.GetTimer(stats.InnerViewBuildingTime).Start()
view, err := analyzer.storage.MakeView(viewBuilder, time.Duration(60)*time.Second, queryStats)
buildTimer.Stop()
if err != nil {
return nil, err
}
return NewViewAdapter(view, storage), nil
return NewViewAdapter(view, storage, queryStats), nil
}

func viewAdapterForRangeQuery(node Node, start time.Time, end time.Time, interval time.Duration, storage *metric.TieredStorage) (*viewAdapter, error) {
func viewAdapterForRangeQuery(node Node, start time.Time, end time.Time, interval time.Duration, storage *metric.TieredStorage, queryStats *stats.TimerGroup) (*viewAdapter, error) {
analyzeTimer := queryStats.GetTimer(stats.QueryAnalysisTime).Start()
analyzer := NewQueryAnalyzer(storage)
analyzer.AnalyzeQueries(node)
analyzeTimer.Stop()

requestBuildTimer := queryStats.GetTimer(stats.ViewRequestBuildTime).Start()
viewBuilder := metric.NewViewRequestBuilder()
for fingerprint, rangeDuration := range analyzer.FullRanges {
// TODO: we should support GetMetricRangeAtInterval() or similar ops in the view builder.
Expand All @@ -125,9 +138,13 @@ func viewAdapterForRangeQuery(node Node, start time.Time, end time.Time, interva
for fingerprint := range analyzer.IntervalRanges {
viewBuilder.GetMetricAtInterval(fingerprint, start, end, interval)
}
view, err := analyzer.storage.MakeView(viewBuilder, time.Duration(60)*time.Second)
requestBuildTimer.Stop()

buildTimer := queryStats.GetTimer(stats.InnerViewBuildingTime).Start()
view, err := analyzer.storage.MakeView(viewBuilder, time.Duration(60)*time.Second, queryStats)
buildTimer.Stop()
if err != nil {
return nil, err
}
return NewViewAdapter(view, storage), nil
return NewViewAdapter(view, storage, queryStats), nil
}
3 changes: 2 additions & 1 deletion rules/recording.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"time"
)
Expand All @@ -32,7 +33,7 @@ type RecordingRule struct {
func (rule RecordingRule) Name() string { return rule.name }

func (rule RecordingRule) EvalRaw(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
return ast.EvalVectorInstant(rule.vector, timestamp, storage)
return ast.EvalVectorInstant(rule.vector, timestamp, storage, stats.NewTimerGroup())
}

func (rule RecordingRule) Eval(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
Expand Down
3 changes: 2 additions & 1 deletion rules/rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"github.com/prometheus/prometheus/utility/test"
"path"
Expand Down Expand Up @@ -377,7 +378,7 @@ func TestExpressions(t *testing.T) {
t.Errorf("%d. Test should fail, but didn't", i)
}
failed := false
resultStr := ast.EvalToString(testExpr, testEvalTime, ast.TEXT, tieredStorage)
resultStr := ast.EvalToString(testExpr, testEvalTime, ast.TEXT, tieredStorage, stats.NewTimerGroup())
resultLines := strings.Split(resultStr, "\n")

if len(exprTest.output) != len(resultLines) {
Expand Down
81 changes: 81 additions & 0 deletions stats/query_stats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package stats

// QueryTiming identifies the code area or functionality in which time is spent
// during a query.
type QueryTiming int

// Query timings.
const (
TotalEvalTime QueryTiming = iota
ResultSortTime
JsonEncodeTime
TotalViewBuildingTime
ViewRequestBuildTime
InnerViewBuildingTime
InnerEvalTime
ResultAppendTime
QueryAnalysisTime
GetValueAtTimeTime
GetBoundaryValuesTime
GetRangeValuesTime
ViewQueueTime
ViewDiskPreparationTime
ViewScanJobsTime
ViewDataExtractionTime
ViewDiskExtractionTime
)

// Return a string represenation of a QueryTiming identifier.
func (s QueryTiming) String() string {
switch s {
case TotalEvalTime:
return "Total eval time"
case ResultSortTime:
return "Result sorting time"
case JsonEncodeTime:
return "JSON encoding time"
case TotalViewBuildingTime:
return "Total view building time"
case ViewRequestBuildTime:
return "View request building time"
case InnerViewBuildingTime:
return "Inner view building time"
case InnerEvalTime:
return "Inner eval time"
case ResultAppendTime:
return "Result append time"
case QueryAnalysisTime:
return "Query analysis time"
case GetValueAtTimeTime:
return "GetValueAtTime() time"
case GetBoundaryValuesTime:
return "GetBoundaryValues() time"
case GetRangeValuesTime:
return "GetRangeValues() time"
case ViewQueueTime:
return "View queue wait time"
case ViewScanJobsTime:
return "View building job scanning time"
case ViewDiskPreparationTime:
return "View building disk preparation time"
case ViewDataExtractionTime:
return "Total view data extraction time"
case ViewDiskExtractionTime:
return "View disk data extraction time"
default:
return "Unknown query timing"
}
}
Loading

0 comments on commit 51689d9

Please sign in to comment.