forked from ray-project/ray
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dashboard_metrics.py
91 lines (80 loc) · 2.85 KB
/
dashboard_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from typing import Optional
from ray.dashboard.consts import COMPONENT_METRICS_TAG_KEYS
class NullMetric:
"""Mock metric class to be used in case of prometheus_client import error."""
def set(self, *args, **kwargs):
pass
def observe(self, *args, **kwargs):
pass
def inc(self, *args, **kwargs):
pass
try:
from prometheus_client import CollectorRegistry, Counter, Histogram, Gauge
# The metrics in this class should be kept in sync with
# python/ray/tests/test_metrics_agent.py
class DashboardPrometheusMetrics:
def __init__(self, registry: Optional[CollectorRegistry] = None):
self.registry: CollectorRegistry = registry or CollectorRegistry(
auto_describe=True
)
# Buckets: 5ms, 10ms, 25ms, 50ms, 75ms
# 100ms, 250ms, 500ms, 750ms
# 1s, 2.5s, 5s, 7.5s, 10s
# 20s, 40s, 60s
# used for API duration
histogram_buckets_s = [
0.005,
0.01,
0.025,
0.05,
0.075,
0.1,
0.25,
0.5,
0.75,
1,
2.5,
5,
7.5,
10,
20,
40,
60,
]
self.metrics_request_duration = Histogram(
"dashboard_api_requests_duration_seconds",
"Total duration in seconds per endpoint",
("endpoint", "http_status", "SessionName", "Component"),
unit="seconds",
namespace="ray",
registry=self.registry,
buckets=histogram_buckets_s,
)
self.metrics_request_count = Counter(
"dashboard_api_requests_count",
"Total requests count per endpoint",
("method", "endpoint", "http_status", "SessionName", "Component"),
unit="requests",
namespace="ray",
registry=self.registry,
)
self.metrics_dashboard_cpu = Gauge(
"component_cpu",
"Dashboard CPU percentage usage.",
tuple(COMPONENT_METRICS_TAG_KEYS),
unit="percentage",
namespace="ray",
registry=self.registry,
)
self.metrics_dashboard_mem = Gauge(
"component_uss",
"USS usage of all components on the node.",
tuple(COMPONENT_METRICS_TAG_KEYS),
unit="mb",
namespace="ray",
registry=self.registry,
)
except ImportError:
class DashboardPrometheusMetrics(object):
def __getattr__(self, attr):
return NullMetric()