Skip to content

Commit

Permalink
Add reload timestamps to metrics (kuskoman#99)
Browse files Browse the repository at this point in the history
* Use version sort add_metrics_to_readme.sh

* kuskoman#93: Adds logstash_stats_pipeline_up metrics

* Add newline to EOF

* Adds reload timestamp metrics

* Fixes kuskoman#96, Last_error field isn't properly defined

* Adds timestamp metrics to test/snapshot/readme

* Remove pipeline up metric

---------

Co-authored-by: Jakub Surdej <jakub.surdej@codahead.com>
  • Loading branch information
excalq and kuskoman authored Apr 21, 2023
1 parent af59030 commit a4321ac
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 4 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,13 @@ Table of exported metrics:
| logstash_stats_pipeline_queue_events_queue_size | counter | Number of events that the queue can accommodate |
| logstash_stats_pipeline_queue_max_size_in_bytes | counter | Maximum size of given queue in bytes. |
| logstash_stats_pipeline_reloads_failures | counter | Number of failed pipeline reloads. |
| logstash_stats_pipeline_reloads_last_failure_timestamp | gauge | Timestamp of last failed pipeline reload. |
| logstash_stats_pipeline_reloads_last_success_timestamp | gauge | Timestamp of last successful pipeline reload. |
| logstash_stats_pipeline_reloads_successes | counter | Number of successful pipeline reloads. |
| logstash_stats_process_cpu_load_average_15m | gauge | Total 15m system load average. |
| logstash_stats_pipeline_up | gauge | Whether the pipeline is up or not. |
| logstash_stats_process_cpu_load_average_1m | gauge | Total 1m system load average. |
| logstash_stats_process_cpu_load_average_5m | gauge | Total 5m system load average. |
| logstash_stats_process_cpu_load_average_15m | gauge | Total 15m system load average. |
| logstash_stats_process_cpu_percent | gauge | CPU usage of the process. |
| logstash_stats_process_cpu_total_millis | gauge | Total CPU time used by the process. |
| logstash_stats_process_max_file_descriptors | gauge | Limit of open file descriptors. |
Expand Down
2 changes: 2 additions & 0 deletions collectors/nodestats/nodestats_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ func TestCollectNotNil(t *testing.T) {
"logstash_stats_pipeline_queue_max_size_in_bytes",
"logstash_stats_pipeline_reloads_failures",
"logstash_stats_pipeline_reloads_successes",
"logstash_stats_pipeline_reloads_last_success_timestamp",
"logstash_stats_pipeline_reloads_last_failure_timestamp",
"logstash_stats_process_cpu_percent",
"logstash_stats_process_cpu_total_millis",
"logstash_stats_process_cpu_load_average_1m",
Expand Down
12 changes: 10 additions & 2 deletions collectors/nodestats/pipeline_subcollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ func NewPipelineSubcollector() *PipelineSubcollector {
ReloadsSuccesses: descHelper.NewDescWithHelpAndLabel("reloads_successes", "Number of successful pipeline reloads.", "pipeline_id"),
ReloadsFailures: descHelper.NewDescWithHelpAndLabel("reloads_failures", "Number of failed pipeline reloads.", "pipeline_id"),

ReloadsLastSuccessTimestamp: descHelper.NewDescWithHelpAndLabel("reloads_last_success_timestamp", "Timestamp of last successful pipeline reload.", "pipeline_id"),
ReloadsLastFailureTimestamp: descHelper.NewDescWithHelpAndLabel("reloads_last_failure_timestamp", "Timestamp of last failed pipeline reload.", "pipeline_id"),

QueueEventsCount: descHelper.NewDescWithHelpAndLabel("queue_events_count", "Number of events in the queue.", "pipeline_id"),
QueueEventsQueueSize: descHelper.NewDescWithHelpAndLabel("queue_events_queue_size", "Number of events that the queue can accommodate", "pipeline_id"),
QueueMaxQueueSizeInBytes: descHelper.NewDescWithHelpAndLabel("queue_max_size_in_bytes", "Maximum size of given queue in bytes.", "pipeline_id"),
Expand All @@ -59,11 +62,16 @@ func (collector *PipelineSubcollector) Collect(pipeStats *responses.SinglePipeli
ch <- prometheus.MustNewConstMetric(collector.EventsDuration, prometheus.CounterValue, float64(pipeStats.Events.DurationInMillis), pipelineID)
ch <- prometheus.MustNewConstMetric(collector.EventsQueuePushDuration, prometheus.CounterValue, float64(pipeStats.Events.QueuePushDurationInMillis), pipelineID)

// todo: add restart timestamps

ch <- prometheus.MustNewConstMetric(collector.ReloadsSuccesses, prometheus.CounterValue, float64(pipeStats.Reloads.Successes), pipelineID)
ch <- prometheus.MustNewConstMetric(collector.ReloadsFailures, prometheus.CounterValue, float64(pipeStats.Reloads.Failures), pipelineID)

if pipeStats.Reloads.LastSuccessTimestamp != nil {
ch <- prometheus.NewMetricWithTimestamp(*pipeStats.Reloads.LastSuccessTimestamp, prometheus.MustNewConstMetric(collector.ReloadsLastSuccessTimestamp, prometheus.GaugeValue, 1, pipelineID))
}
if pipeStats.Reloads.LastFailureTimestamp != nil {
ch <- prometheus.NewMetricWithTimestamp(*pipeStats.Reloads.LastFailureTimestamp, prometheus.MustNewConstMetric(collector.ReloadsLastFailureTimestamp, prometheus.GaugeValue, 1, pipelineID))
}

ch <- prometheus.MustNewConstMetric(collector.QueueEventsCount, prometheus.CounterValue, float64(pipeStats.Queue.EventsCount), pipelineID)
ch <- prometheus.MustNewConstMetric(collector.QueueEventsQueueSize, prometheus.CounterValue, float64(pipeStats.Queue.QueueSizeInBytes), pipelineID)
ch <- prometheus.MustNewConstMetric(collector.QueueMaxQueueSizeInBytes, prometheus.CounterValue, float64(pipeStats.Queue.MaxQueueSizeInBytes), pipelineID)
Expand Down
2 changes: 1 addition & 1 deletion scripts/add_metrics_to_readme.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ endLine=$(grep -n "^<!-- METRICS_TABLE_END -->" $FILE | awk -F: '{print $1}')

metricsTable=$(echo "| Name | Type | Description |
| ----------- | ----------- | ----------- |
$(getMetrics | sort)")
$(getMetrics | sort --version-sort)")

for ((i=0; i<${#LINES[@]}; i++)); do
if [ $i -eq $startLine ]; then
Expand Down
2 changes: 2 additions & 0 deletions scripts/snapshots/metric_names.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ logstash_stats_pipeline_queue_events_queue_size
logstash_stats_pipeline_queue_max_size_in_bytes
logstash_stats_pipeline_reloads_failures
logstash_stats_pipeline_reloads_successes
logstash_stats_pipeline_reloads_last_success_timestamp
logstash_stats_pipeline_reloads_last_failure_timestamp
logstash_stats_process_cpu_percent
logstash_stats_process_cpu_total_millis
logstash_stats_process_cpu_load_average_1m
Expand Down

0 comments on commit a4321ac

Please sign in to comment.