Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Display ZGC collector stats in report #1502

Merged
merged 11 commits into from
Jun 2, 2022
4 changes: 4 additions & 0 deletions docs/metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ Rally stores the following metrics:
* ``node_total_young_gen_gc_count``: The total number of young generation garbage collections across the whole cluster as reported by the node stats API.
* ``node_total_old_gen_gc_time``: The total runtime of the old generation garbage collector across the whole cluster as reported by the node stats API.
* ``node_total_old_gen_gc_count``: The total number of old generation garbage collections across the whole cluster as reported by the node stats API.
* ``node_total_zgc_cycles_gc_time``: The total time spent doing GC by the ZGC garbage collector across the whole cluster as reported by the node stats API.
* ``node_total_zgc_cycles_gc_count``: The total number of garbage collections performed by ZGC across the whole cluster as reported by the node stats API.
* ``node_total_zgc_pauses_gc_time``: The total time spent in Stop-The-World pauses by the ZGC garbage collector across the whole cluster as reported by the node stats API.
* ``node_total_zgc_pauses_gc_count``: The total number of Stop-The-World pauses performed by ZGC across the whole cluster as reported by the node stats API.
* ``segments_count``: Total number of segments as reported by the index stats API.
* ``segments_memory_in_bytes``: Number of bytes used for segments as reported by the index stats API.
* ``segments_doc_values_memory_in_bytes``: Number of bytes used for doc values as reported by the index stats API.
Expand Down
24 changes: 24 additions & 0 deletions docs/summary_report.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,30 @@ Total Old Gen GC count
* **Definition**: The total number of old generation garbage collections across the whole cluster as reported by the node stats API.
* **Corresponding metrics key**: ``node_total_old_gen_gc_count``

Total ZGC Cycles GC time
------------------------

* **Definition**: The total time spent doing GC by the ZGC garbage collector across the whole cluster as reported by the node stats API.
* **Corresponding metrics key**: ``node_total_zgc_cycles_gc_time``

Total ZGC Cycles GC count
-------------------------

* **Definition**: The total number of garbage collections performed by ZGC across the whole cluster as reported by the node stats API.
* **Corresponding metrics key**: ``node_total_zgc_cycles_gc_count``

Total ZGC Pauses GC time
------------------------

* **Definition**: The total time spent in Stop-The-World pauses by the ZGC garbage collector across the whole cluster as reported by the node stats API.
* **Corresponding metrics key**: ``node_total_zgc_pauses_gc_time``

Total ZGC Pauses GC count
-------------------------

* **Definition**: The total number of Stop-The-World pauses performed by ZGC across the whole cluster as reported by the node stats API.
* **Corresponding metrics key**: ``node_total_zgc_pauses_gc_count``

Store size
----------

Expand Down
8 changes: 8 additions & 0 deletions esrally/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1790,6 +1790,10 @@ def __call__(self):
result.young_gc_count = self.sum("node_total_young_gen_gc_count")
result.old_gc_time = self.sum("node_total_old_gen_gc_time")
result.old_gc_count = self.sum("node_total_old_gen_gc_count")
result.zgc_cycles_gc_time = self.sum("node_total_zgc_cycles_gc_time")
result.zgc_cycles_gc_count = self.sum("node_total_zgc_cycles_gc_count")
result.zgc_pauses_gc_time = self.sum("node_total_zgc_pauses_gc_time")
result.zgc_pauses_gc_count = self.sum("node_total_zgc_pauses_gc_count")

self.logger.debug("Gathering segment memory metrics.")
result.memory_segments = self.median("segments_memory_in_bytes")
Expand Down Expand Up @@ -1973,6 +1977,10 @@ def __init__(self, d=None):
self.young_gc_count = self.v(d, "young_gc_count")
self.old_gc_time = self.v(d, "old_gc_time")
self.old_gc_count = self.v(d, "old_gc_count")
self.zgc_cycles_gc_time = self.v(d, "zgc_cycles_gc_time")
self.zgc_cycles_gc_count = self.v(d, "zgc_cycles_gc_count")
self.zgc_pauses_gc_time = self.v(d, "zgc_pauses_gc_time")
self.zgc_pauses_gc_count = self.v(d, "zgc_pauses_gc_count")

self.memory_segments = self.v(d, "memory_segments")
self.memory_doc_values = self.v(d, "memory_doc_values")
Expand Down
57 changes: 29 additions & 28 deletions esrally/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,10 @@ def _report_gc_metrics(self, stats):
self._line("Total Young Gen GC count", "", stats.young_gc_count, ""),
self._line("Total Old Gen GC time", "", stats.old_gc_time, "s", convert.ms_to_seconds),
self._line("Total Old Gen GC count", "", stats.old_gc_count, ""),
self._line("Total ZGC Cycles GC time", "", stats.zgc_cycles_gc_time, "s", convert.ms_to_seconds),
self._line("Total ZGC Cycles GC count", "", stats.zgc_cycles_gc_count, ""),
self._line("Total ZGC Pauses GC time", "", stats.zgc_pauses_gc_time, "s", convert.ms_to_seconds),
self._line("Total ZGC Pauses GC count", "", stats.zgc_pauses_gc_count, ""),
)

def _report_disk_usage(self, stats):
Expand Down Expand Up @@ -849,41 +853,38 @@ def _report_total_count(self, name, baseline_total, contender_total):
)

def _report_gc_metrics(self, baseline_stats, contender_stats):
return self._join(
self._line(
"Total Young Gen GC time",
baseline_stats.young_gc_time,
contender_stats.young_gc_time,
"",
"s",
treat_increase_as_improvement=False,
formatter=convert.ms_to_seconds,
),
self._line(
"Total Young Gen GC count",
baseline_stats.young_gc_count,
contender_stats.young_gc_count,
"",
"",
treat_increase_as_improvement=False,
),
self._line(
"Total Old Gen GC time",
baseline_stats.old_gc_time,
contender_stats.old_gc_time,
line_method = self._line

def _time_metric(metric_prefix, description):
return line_method(
f"Total {description} GC time",
getattr(baseline_stats, f"{metric_prefix}_gc_time"),
getattr(contender_stats, f"{metric_prefix}_gc_time"),
"",
"s",
treat_increase_as_improvement=False,
formatter=convert.ms_to_seconds,
),
self._line(
"Total Old Gen GC count",
baseline_stats.old_gc_count,
contender_stats.old_gc_count,
)

def _count_metric(metric_prefix, description):
return line_method(
f"Total {description} GC count",
getattr(baseline_stats, f"{metric_prefix}_gc_count"),
getattr(contender_stats, f"{metric_prefix}_gc_count"),
"",
"",
treat_increase_as_improvement=False,
),
)

return self._join(
_time_metric("young", "Young Gen"),
_count_metric("young", "Young Gen"),
_time_metric("old", "Old Gen"),
_count_metric("old", "Old Gen"),
_time_metric("zgc_cycles", "ZGC Cycles"),
_count_metric("zgc_cycles", "ZGC Cycles"),
_time_metric("zgc_pauses", "ZGC Pauses"),
_count_metric("zgc_pauses", "ZGC Pauses"),
)

def _report_disk_usage(self, baseline_stats, contender_stats):
Expand Down
45 changes: 45 additions & 0 deletions tests/metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,12 @@ def test_calculate_global_stats(self):
store.put_value_cluster_level("latency", 220, unit="ms", task="index #1", operation_type=track.OperationType.Bulk)
store.put_value_cluster_level("latency", 225, unit="ms", task="index #1", operation_type=track.OperationType.Bulk)

for collector in ("young_gen", "old_gen", "zgc_cycles", "zgc_pauses"):
store.put_value_node_level("rally-node-0", f"node_{collector}_gc_time", 100, "ms")
store.put_value_node_level("rally-node-0", f"node_{collector}_gc_count", 1)
store.put_value_cluster_level(f"node_total_{collector}_gc_time", 100, "ms")
store.put_value_cluster_level(f"node_total_{collector}_gc_count", 1)

store.put_value_cluster_level(
"service_time",
250,
Expand Down Expand Up @@ -1811,6 +1817,15 @@ def test_calculate_global_stats(self):
]
assert opm2["duration"] == 600 * 1000

assert stats.young_gc_time == 100
assert stats.young_gc_count == 1
assert stats.old_gc_time == 100
assert stats.old_gc_count == 1
assert stats.zgc_cycles_gc_time == 100
assert stats.zgc_cycles_gc_count == 1
assert stats.zgc_pauses_gc_time == 100
assert stats.zgc_pauses_gc_count == 1

def test_calculate_system_stats(self):
cfg = config.Config()
cfg.add(config.Scope.application, "system", "env.name", "unittest")
Expand Down Expand Up @@ -1982,6 +1997,10 @@ def test_as_flat_list(self):
"young_gc_count": 7,
"old_gc_time": 0,
"old_gc_count": 0,
"zgc_cycles_gc_time": 100,
"zgc_cycles_gc_count": 1,
"zgc_pauses_gc_time": 50,
"zgc_pauses_gc_count": 1,
"merge_time": 3702,
"merge_time_per_shard": {
"min": 40,
Expand Down Expand Up @@ -2153,6 +2172,32 @@ def test_as_flat_list(self):
},
}

assert select(metric_list, "zgc_cycles_gc_time") == {
"name": "zgc_cycles_gc_time",
"value": {
"single": 100,
},
}
assert select(metric_list, "zgc_cycles_gc_count") == {
"name": "zgc_cycles_gc_count",
"value": {
"single": 1,
},
}

assert select(metric_list, "zgc_pauses_gc_time") == {
"name": "zgc_pauses_gc_time",
"value": {
"single": 50,
},
}
assert select(metric_list, "zgc_pauses_gc_count") == {
"name": "zgc_pauses_gc_count",
"value": {
"single": 1,
},
}

assert select(metric_list, "merge_time") == {
"name": "merge_time",
"value": {
Expand Down
39 changes: 39 additions & 0 deletions tests/reporter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from unittest import mock

from esrally import config, reporter
from esrally.metrics import GlobalStats
from esrally.utils import convert


Expand Down Expand Up @@ -77,3 +78,41 @@ def test_diff_percent_divide_by_zero(self):
def test_diff_percent_ignore_formatter(self):
formatted = self.reporter._diff(1, 0, False, formatter=convert.factor(100.0), as_percentage=True)
assert formatted == "-100.00%"

def test_report_gc_metrics(self):
r1 = GlobalStats(
{
"young_gc_time": 100,
"young_gc_count": 1,
"old_gc_time": 200,
"old_gc_count": 1,
"zgc_cycles_gc_time": 300,
"zgc_cycles_gc_count": 1,
"zgc_pauses_gc_time": 400,
"zgc_pauses_gc_count": 1,
}
)

r2 = GlobalStats(
{
"young_gc_time": 200,
"young_gc_count": 2,
"old_gc_time": 300,
"old_gc_count": 2,
"zgc_cycles_gc_time": 400,
"zgc_cycles_gc_count": 2,
"zgc_pauses_gc_time": 500,
"zgc_pauses_gc_count": 2,
}
)
metrics = self.reporter._report_gc_metrics(r1, r2)
assert metrics == [
["Total Young Gen GC time", "", 0.1, 0.2, "+0.10000", "s", "+100.00%"],
["Total Young Gen GC count", "", 1, 2, "+1.00000", "", "+100.00%"],
["Total Old Gen GC time", "", 0.2, 0.3, "+0.10000", "s", "+50.00%"],
["Total Old Gen GC count", "", 1, 2, "+1.00000", "", "+100.00%"],
["Total ZGC Cycles GC time", "", 0.3, 0.4, "+0.10000", "s", "+33.33%"],
["Total ZGC Cycles GC count", "", 1, 2, "+1.00000", "", "+100.00%"],
["Total ZGC Pauses GC time", "", 0.4, 0.5, "+0.10000", "s", "+25.00%"],
["Total ZGC Pauses GC count", "", 1, 2, "+1.00000", "", "+100.00%"],
]