Make track configuration more modular

With this commit we add a new `collect` helper macro to Rally's template loader. We also document how to use it. Closes #205
elastic · Feb 20, 2017 · 5bd7013 · 5bd7013
1 parent 09f4135
commit 5bd7013
Show file tree

Hide file tree

Showing 4 changed files with 164 additions and 4 deletions.
diff --git a/docs/adding_tracks.rst b/docs/adding_tracks.rst
@@ -184,6 +184,142 @@ The preparation is very easy and requires these two steps:
 
 You have to repeat these steps for all data files of your track.
 
+Structuring your track
+----------------------
+
+``track.json`` is just the entry point to a track but you can split your track as you see fit. Suppose you want to add more challenges to the track above but you want to keep them in a separate files. Let's start by storing our challenge in a separate file, e.g in ``challenges/append-only.json``. Create the directory and store the following in ``append-only.json``::
+
+    {
+          "name": "append-only",
+          "description": "",
+          "default": true,
+          "index-settings": {
+            "index.number_of_replicas": 0
+          },
+          "schedule": [
+            {
+              "operation": "index",
+              "warmup-time-period": 120,
+              "clients": 8
+            },
+            {
+              "operation": "force-merge",
+              "clients": 1
+            },
+            {
+              "operation": "query-match-all",
+              "clients": 8,
+              "warmup-iterations": 1000,
+              "iterations": 1000,
+              "target-throughput": 100
+            }
+          ]
+        }
+
+Now modify ``track.json`` so it knows about your new file::
+
+
+    {
+      "short-description": "Standard benchmark in Rally (8.6M POIs from Geonames)",
+      "description": "This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk request against Elasticsearch",
+      "data-url": "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/geonames",
+      "indices": [
+        {
+          "name": "geonames",
+          "types": [
+            {
+              "name": "type",
+              "mapping": "mappings.json",
+              "documents": "documents.json.bz2",
+              "document-count": 8647880,
+              "compressed-bytes": 197857614,
+              "uncompressed-bytes": 2790927196
+            }
+          ]
+        }
+      ],
+      "operations": [
+        {
+          "name": "index",
+          "operation-type": "index",
+          "bulk-size": 5000
+        },
+        {
+          "name": "force-merge",
+          "operation-type": "force-merge"
+        },
+        {
+          "name": "query-match-all",
+          "operation-type": "search",
+          "body": {
+            "query": {
+              "match_all": {}
+            }
+          }
+        }
+      ],
+      "challenges": [
+        {% include "challenges/append-no-conflicts.json" %}
+      ]
+    }
+
+We replaced the challenge content with  ``{% include "challenges/append-no-conflicts.json" %}`` which tells Rally to include the challenge from the provided file. You can use ``include`` on arbitrary parts of your track.
+
+However, if your track consists of multiple challenges it can be cumbersome to include them all explicitly. Therefore Rally brings a ``collect`` helper that collects all related files for you. Let's adapt our track to use it::
+
+    {% import "rally.helpers" as rally %}
+    {
+      "short-description": "Standard benchmark in Rally (8.6M POIs from Geonames)",
+      "description": "This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk request against Elasticsearch",
+      "data-url": "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/geonames",
+      "indices": [
+        {
+          "name": "geonames",
+          "types": [
+            {
+              "name": "type",
+              "mapping": "mappings.json",
+              "documents": "documents.json.bz2",
+              "document-count": 8647880,
+              "compressed-bytes": 197857614,
+              "uncompressed-bytes": 2790927196
+            }
+          ]
+        }
+      ],
+      "operations": [
+        {
+          "name": "index",
+          "operation-type": "index",
+          "bulk-size": 5000
+        },
+        {
+          "name": "force-merge",
+          "operation-type": "force-merge"
+        },
+        {
+          "name": "query-match-all",
+          "operation-type": "search",
+          "body": {
+            "query": {
+              "match_all": {}
+            }
+          }
+        }
+      ],
+      "challenges": [
+        {{ rally.collect(parts="challenges/*.json") }}
+      ]
+    }
+
+We changed two things here. First, we imported helper functions from Rally by adding ``{% import "rally.helpers" as rally %}`` in line 1. Second, we used Rally's ``collect`` helper to find and include all JSON files in the "challenges" subdirectory with the statement ``{{ rally.collect(parts="challenges/*.json") }}``. When you add new challenges in this directory, Rally will automatically pick them up.
+
+.. note::
+
+    If you want to check the final result, please check Rally's log file. Rally will print the full rendered track there after it has loaded it successfully.
+
+You've now mastered the basics of track development for Rally. It's time to pat yourself on the back before you dive into the advanced topics!
+
 How to contribute a track
 -------------------------
 

diff --git a/esrally/track/loader.py b/esrally/track/loader.py
@@ -3,6 +3,7 @@
 import logging
 import os
 import sys
+import glob
 import urllib.error
 
 import jinja2
@@ -277,17 +278,39 @@ def _update(self, distribution_version):
             raise exceptions.DataError("Cannot update track data in [%s]." % self.tracks_dir).with_traceback(tb)
 
 
-def render_template(loader, template_name, clock=time.Clock):
+def render_template(loader, base_path, template_name, clock=time.Clock):
+    def relative_glob(start, f):
+        result = glob.glob(os.path.join(start, f), recursive=False)
+        if result:
+            return [os.path.relpath(p, start) for p in result]
+        else:
+            return []
+
     env = jinja2.Environment(loader=loader)
     env.globals["now"] = clock.now()
+    env.globals["glob"] = lambda f: relative_glob(base_path, f)
     env.filters["days_ago"] = time.days_ago
     template = env.get_template(template_name)
 
     return template.render()
 
 
 def render_template_from_file(template_file_name):
-    return render_template(loader=jinja2.FileSystemLoader(io.dirname(template_file_name)), template_name=io.basename(template_file_name))
+    macros = """
+        {% macro collect(parts) -%}
+            {% set comma = joiner() %}
+            {% for part in glob(parts) %}
+                {{ comma() }}
+                {% include part %}
+            {% endfor %}
+        {%- endmacro %}
+    """
+
+    base_path = io.dirname(template_file_name)
+    # place helpers dict loader first to prevent users from overriding our macros.
+    return render_template(loader=jinja2.ChoiceLoader([jinja2.DictLoader({"rally.helpers": macros}), jinja2.FileSystemLoader(base_path)]),
+                           base_path=base_path,
+                           template_name=io.basename(template_file_name))
 
 
 def post_process_for_test_mode(t):

diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@ def str_from_file(name):
     "py-cpuinfo==0.2.3",
     "tabulate==0.7.5",
     "jsonschema==2.5.1",
-    "Jinja2==2.8",
+    "Jinja2==2.9.5",
     # remote messaging
     "thespian==3.6.1",
     # recommended library for thespian to identify actors more easily with `ps`

diff --git a/tests/track/loader_test.py b/tests/track/loader_test.py
@@ -26,7 +26,8 @@ def test_render_template(self):
         }
         """
 
-        rendered = loader.render_template(loader=jinja2.DictLoader({"unittest": template}), template_name="unittest", clock=StaticClock)
+        rendered = loader.render_template(
+            loader=jinja2.DictLoader({"unittest": template}), base_path=".", template_name="unittest", clock=StaticClock)
 
         expected = """
         {