elastic · danielmitterdorfer · Jan 18, 2021 · Jan 18, 2021 · Jan 18, 2021 · gingerwizard
diff --git a/docs/command_line_reference.rst b/docs/command_line_reference.rst
@@ -327,10 +327,15 @@ You can use ``--include-tasks`` to specify a comma-separated list of tasks that
 
     Tasks will be executed in the order that are defined in the challenge, not in the order they are defined in the command.
 
+.. note::
+
+    Task filters are case-sensitive.
+
 **Examples**:
 
 * Execute only the tasks with the name ``index`` and ``term``: ``--include-tasks="index,term"``
 * Execute only tasks of type ``search``: ``--include-tasks="type:search"``
+* Execute only tasks that contain the tag ``read-op``: ``--include-tasks="tag:read-op"``
 * You can also mix and match: ``--include-tasks="index,type:search"``
 
 ``exclude-tasks``
@@ -344,7 +349,8 @@ You can use ``--exclude-tasks`` to specify a comma-separated list of tasks that
 
 * Skip any tasks with the name ``index`` and ``term``: ``--exclude-tasks="index,term"``
 * Skip any tasks of type ``search``: ``--exclude-tasks="type:search"``
-* You can also mix and match: ``--exclude-tasks="index,type:search"``
+* Skip any tasks that contain the tag ``setup``: ``--exclude-tasks="tag:setup"``
+* You can also mix and match: ``--exclude-tasks="index,type:search,tag:setup"``
 
 ``team-repository``
 ~~~~~~~~~~~~~~~~~~~

diff --git a/docs/track.rst b/docs/track.rst
@@ -410,6 +410,7 @@ schedule
 The ``schedule`` element contains a list of tasks that are executed by Rally, i.e. it describes the workload. Each task consists of the following properties:
 
 * ``name`` (optional): This property defines an explicit name for the given task. By default the operation's name is implicitly used as the task name but if the same operation is run multiple times, a unique task name must be specified using this property.
+* ``tags`` (optional): This property defines one or more tags for the given task. This can be used for :ref:`task filtering <clr_include_tasks>`, e.g. with ``--exclude-tasks="tag:setup"`` all tasks except the ones that contain the tag ``setup`` are executed.
 * ``operation`` (mandatory): This property refers either to the name of an operation that has been defined in the ``operations`` section or directly defines an operation inline.
 * ``clients`` (optional, defaults to 1): The number of clients that should execute a task concurrently.
 * ``warmup-iterations`` (optional, defaults to 0): Number of iterations that each client should execute to warmup the benchmark candidate. Warmup iterations will not show up in the measurement results.

diff --git a/esrally/track/loader.py b/esrally/track/loader.py
@@ -810,6 +810,8 @@ def _filters_from_filtered_tasks(self, filtered_tasks):
                 elif len(spec) == 2:
                     if spec[0] == "type":
                         filters.append(track.TaskOpTypeFilter(spec[1]))
+                    elif spec[0] == "tag":
+                        filters.append(track.TaskTagFilter(spec[1]))
                     else:
                         raise exceptions.SystemSetupError(f"Invalid format for filtered tasks: [{t}]. "
                                                           f"Expected [type] but got [{spec[0]}].")
@@ -1458,6 +1460,7 @@ def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=N
         task_name = self._r(task_spec, "name", error_ctx=op.name, mandatory=False, default_value=op.name)
         task = track.Task(name=task_name,
                           operation=op,
+                          tags=self._r(task_spec, "tags", error_ctx=op.name, mandatory=False),
                           meta_data=self._r(task_spec, "meta", error_ctx=op.name, mandatory=False),
                           warmup_iterations=self._r(task_spec, "warmup-iterations", error_ctx=op.name, mandatory=False,
                                                     default_value=default_warmup_iterations),

diff --git a/esrally/track/track.py b/esrally/track/track.py
@@ -704,7 +704,7 @@ def __eq__(self, other):
         return isinstance(other, type(self)) and self.name == other.name
 
     def __str__(self, *args, **kwargs):
-        return "filter for task name [%s]" % self.name
+        return f"filter for task name [{self.name}]"
 
 
 class TaskOpTypeFilter:
@@ -721,7 +721,24 @@ def __eq__(self, other):
         return isinstance(other, type(self)) and self.op_type == other.op_type
 
     def __str__(self, *args, **kwargs):
-        return "filter for operation type [%s]" % self.op_type
+        return f"filter for operation type [{self.op_type}]"
+
+
+class TaskTagFilter:
+    def __init__(self, tag_name):
+        self.tag_name = tag_name
+
+    def matches(self, task):
+        return self.tag_name in task.tags
+
+    def __hash__(self):
+        return hash(self.tag_name)
+
+    def __eq__(self, other):
+        return isinstance(other, type(self)) and self.tag_name == other.tag_name
+
+    def __str__(self, *args, **kwargs):
+        return f"filter for tasks tagged [{self.tag_name}]"
 
 
 class Singleton(type):
@@ -785,10 +802,16 @@ def __eq__(self, other):
 class Task:
     THROUGHPUT_PATTERN = re.compile(r"(?P<value>(\d*\.)?\d+)\s(?P<unit>\w+/s)")
 
-    def __init__(self, name, operation, meta_data=None, warmup_iterations=None, iterations=None, warmup_time_period=None,
-                 time_period=None, clients=1, completes_parent=False, schedule=None, params=None):
+    def __init__(self, name, operation, tags=None, meta_data=None, warmup_iterations=None, iterations=None,
+                 warmup_time_period=None, time_period=None, clients=1, completes_parent=False, schedule=None, params=None):
         self.name = name
         self.operation = operation
+        if isinstance(tags, str):
+            self.tags = [tags]
+        elif tags:
+            self.tags = tags
+        else:
+            self.tags = []
         self.meta_data = meta_data if meta_data else {}
         self.warmup_iterations = warmup_iterations
         self.iterations = iterations

diff --git a/tests/track/loader_test.py b/tests/track/loader_test.py
@@ -1409,25 +1409,53 @@ def test_filters_tasks(self):
                         },
                         {
                             "operation": "cluster-stats"
+                        },
+                        {
+                            "parallel": {
+                                "tasks": [
+                                    {
+                                        "name": "query-filtered",
+                                        "tags": "include-me",
+                                        "operation": "match-all",
+                                    },
+                                    {
+                                        "name": "index-4",
+                                        "tags": ["include-me", "bulk-task"],
+                                        "operation": "bulk-index",
+                                    },
+                                    {
+                                        "name": "index-5",
+                                        "operation": "bulk-index",
+                                    }
+                                ]
+                            }
+                        },
+                        {
+                            "name": "final-cluster-stats",
+                            "operation": "cluster-stats",
+                            "tags": "include-me"
                         }
                     ]
                 }
             ]
         }
         reader = loader.TrackSpecificationReader()
         full_track = reader("unittest", track_specification, "/mappings")
-        self.assertEqual(5, len(full_track.challenges[0].schedule))
+        self.assertEqual(7, len(full_track.challenges[0].schedule))
 
         filtered = self.filter(full_track, include_tasks=["index-3",
                                                           "type:search",
                                                           # Filtering should also work for non-core operation types.
-                                                          "type:custom-operation-type"])
+                                                          "type:custom-operation-type",
+                                                          "tag:include-me"])
 
         schedule = filtered.challenges[0].schedule
-        self.assertEqual(3, len(schedule))
+        self.assertEqual(5, len(schedule))
         self.assertEqual(["index-3", "match-all-parallel"], [t.name for t in schedule[0].tasks])
         self.assertEqual("match-all-serial", schedule[1].name)
         self.assertEqual("cluster-stats", schedule[2].name)
+        self.assertEqual(["query-filtered", "index-4"], [t.name for t in schedule[3].tasks])
+        self.assertEqual("final-cluster-stats", schedule[4].name)
 
     def test_filters_exclude_tasks(self):
         track_specification = {

diff --git a/tests/track/track_test.py b/tests/track/track_test.py
@@ -235,12 +235,14 @@ class TaskFilterTests(TestCase):
     def create_index_task(self):
         return track.Task("create-index-task",
                           track.Operation("create-index-op",
-                                          operation_type=track.OperationType.CreateIndex.to_hyphenated_string()))
+                                          operation_type=track.OperationType.CreateIndex.to_hyphenated_string()),
+                          tags=["write-op", "admin-op"])
 
     def search_task(self):
         return track.Task("search-task",
                           track.Operation("search-op",
-                                          operation_type=track.OperationType.Search.to_hyphenated_string()))
+                                          operation_type=track.OperationType.Search.to_hyphenated_string()),
+                          tags="read-op")
 
     def test_task_name_filter(self):
         f = track.TaskNameFilter("create-index-task")
@@ -252,6 +254,11 @@ def test_task_op_type_filter(self):
         self.assertTrue(f.matches(self.create_index_task()))
         self.assertFalse(f.matches(self.search_task()))
 
+    def test_task_tag_filter(self):
+        f = track.TaskTagFilter(tag_name="write-op")
+        self.assertTrue(f.matches(self.create_index_task()))
+        self.assertFalse(f.matches(self.search_task()))
+
 
 class TaskTests(TestCase):
     def task(self, schedule=None, target_throughput=None, target_interval=None):