diff --git a/external/elasticsearch/README.md b/external/elasticsearch/README.md
index a8cfe1a92..44b14e8c2 100644
--- a/external/elasticsearch/README.md
+++ b/external/elasticsearch/README.md
@@ -1,70 +1,6 @@
 storm-crawler-elasticsearch
 ===========================
 
-A collection of resources for [Elasticsearch](https://www.elastic.co/products/elasticsearch):
-* [IndexerBolt](https://github.org/apache/incubator-stormcrawler/blob/master/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/IndexerBolt.java) for indexing documents crawled with StormCrawler
-* [Spouts](https://github.org/apache/incubator-stormcrawler/blob/master/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/AggregationSpout.java) and [StatusUpdaterBolt](https://github.org/apache/incubator-stormcrawler/blob/master/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/StatusUpdaterBolt.java) for persisting URL information in recursive crawls
-* [MetricsConsumer](https://github.org/apache/incubator-stormcrawler/blob/master/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/MetricsConsumer.java)
-* [StatusMetricsBolt](https://github.org/apache/incubator-stormcrawler/blob/master/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/StatusMetricsBolt.java) for sending the breakdown of URLs per status as metrics and display its evolution over time.
-
-as well as an archetype containing a basic crawl topology and its configuration.
-
-We also have resources for [Kibana](https://www.elastic.co/products/kibana) to build basic real-time monitoring dashboards for the crawls. A dashboard for [Grafana](http://grafana.com/) is also [available](https://grafana.com/dashboards/2363).
-
-Getting started
----------------------
-
-Use the archetype for Elasticsearch with:
-
-`mvn archetype:generate -DarchetypeGroupId=org.apache.stormcrawler -DarchetypeArtifactId=storm-crawler-elasticsearch-archetype -DarchetypeVersion=2.11`
-
-You'll be asked to enter a groupId (e.g. com.mycompany.crawler), an artefactId (e.g. stormcrawler), a version, a package name and details about the user agent to use.
-
-This will not only create a fully formed project containing a POM with the dependency above but also a set of resources, configuration files and a topology class. Enter the directory you just created (should be the same as the artefactId you specified earlier) and follow the instructions on the README file.
-
-Video tutorial
----------------------
-
-[![Video tutorial](https://i.ytimg.com/vi/8kpJLPdhvLw/hqdefault.jpg)](https://youtu.be/8kpJLPdhvLw)
-
-
-Kibana
----------------------
-
-To import the dashboards into a local instance of Kibana, go into the folder _kibana_ and run the script _importKibana.sh_. 
-
-You should see something like 
-
-```
-Importing status dashboard into Kibana
-{"success":true,"successCount":4}
-Importing metrics dashboard into Kibana
-{"success":true,"successCount":9}
-```
-
-The [dashboard screen](http://localhost:5601/app/kibana#/dashboards) should show both the status and metrics dashboards. If you click on `Crawl Status`, you should see 2 tables containing the count of URLs per status and the top hostnames per URL count.
-The [Metrics dashboard](http://localhost:5601/app/kibana#/dashboard/Crawl-metrics) can be used to monitor the progress of the crawl.
-
-The file _storm.ndjson_ is used to display some of Storm's internal metrics and is not added by default.
-
-#### Per time period metric indices (optional)
-
-The _metrics_ index can be configured per tine period. This best practice is [discussed on the Elastic website](https://www.elastic.co/guide/en/elasticsearch/guide/current/time-based.html).
-
-The crawler config YAML must be updated to use an optional argument as shown below to have one index per day:
-
-```
- #Metrics consumers:
-    topology.metrics.consumer.register:
-         - class: "org.apache.stormcrawler.elasticsearch.metrics.MetricsConsumer"
-           parallelism.hint: 1
-           argument: "yyyy-MM-dd"
-```
-
-
-
-
-
-
-
-
+Elasticsearch has changed its licence to `Elastic License 2.0` in recent releases, which is not an ASFv2 compliant licence.
+Therefore, the corresponding module was removed. Users can migrate to the `opensearch` module and follow the
+[opensearch upgrade guideline](https://opensearch.org/docs/latest/upgrade-to/upgrade-to/) to migrate existing elasticsearch installations.
diff --git a/external/elasticsearch/archetype/pom.xml b/external/elasticsearch/archetype/pom.xml
deleted file mode 100644
index 499173ee7..000000000
--- a/external/elasticsearch/archetype/pom.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.stormcrawler</groupId>
-    <artifactId>storm-crawler</artifactId>
-    <version>2.12-SNAPSHOT</version>
-    <relativePath>../../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>storm-crawler-elasticsearch-archetype</artifactId>
-  
-  <packaging>maven-archetype</packaging>
-
-  <build>
-
-<resources>
-<resource>
-    <directory>src/main/resources</directory>
-    <filtering>true</filtering>
-    <includes>
-        <include>META-INF/maven/archetype-metadata.xml</include>
-    </includes>
-</resource>
-<resource>
-    <directory>src/main/resources</directory>
-    <filtering>false</filtering>
-    <excludes>
-        <exclude>META-INF/maven/archetype-metadata.xml</exclude>
-    </excludes>
-</resource>
-</resources>
-
-    <extensions>
-      <extension>
-        <groupId>org.apache.maven.archetype</groupId>
-        <artifactId>archetype-packaging</artifactId>
-        <version>3.1.2</version>
-      </extension>
-    </extensions>
-
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <artifactId>maven-archetype-plugin</artifactId>
-          <version>3.2.1</version>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-</project>
diff --git a/external/elasticsearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy b/external/elasticsearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
deleted file mode 100644
index 499d22941..000000000
--- a/external/elasticsearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
+++ /dev/null
@@ -1,5 +0,0 @@
-def file = new File(request.getOutputDirectory(), request.getArtifactId() + "/ES_IndexInit.sh")
-file.setExecutable(true, false)
-
-def file2 = new File(request.getOutputDirectory(), request.getArtifactId() + "/kibana/importKibana.sh")
-file2.setExecutable(true, false)
diff --git a/external/elasticsearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml b/external/elasticsearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
deleted file mode 100644
index c3a0c6d2f..000000000
--- a/external/elasticsearch/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ /dev/null
@@ -1,77 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<archetype-descriptor
-	xmlns="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0"
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0 http://maven.apache.org/xsd/archetype-descriptor-1.1.0.xsd"
-	name="storm-crawler-core">
-
-	<requiredProperties>
-		<requiredProperty key="http-agent-name">
-			<validationRegex>^[a-zA-Z_\-]+$</validationRegex>
-		</requiredProperty>
-		<requiredProperty key="http-agent-version" />
-		<requiredProperty key="http-agent-description" />
-		<requiredProperty key="http-agent-url" />
-		<requiredProperty key="http-agent-email">
-			<validationRegex>^\S+@\S+\.\S+$</validationRegex>
-		</requiredProperty>
-		<requiredProperty key="StormCrawlerVersion">
-            <defaultValue>${project.version}</defaultValue>
-         </requiredProperty>
-	</requiredProperties>
-	
-  <fileSets>
-    <fileSet filtered="true" packaged="true" encoding="UTF-8">
-      <directory>src/main/java</directory>
-      <includes>
-        <include>**/*.java</include>
-      </includes>
-    </fileSet>
-    <fileSet filtered="true" encoding="UTF-8">
-      <directory>src/main/resources</directory>
-      <includes>
-        <include>**/*.xml</include>
-        <include>**/*.txt</include>
-        <include>**/*.yaml</include>
-        <include>**/*.json</include>
-      </includes>
-    </fileSet>
-    <fileSet filtered="true" encoding="UTF-8">
-      <directory></directory>
-      <includes>
-        <include>README.md</include>
-        <include>ES_IndexInit.sh</include>
-        <include>*.flux</include>
-        <include>*.yaml</include>
-      </includes>
-    </fileSet>
-    <fileSet filtered="true" encoding="UTF-8">
-      <directory>kibana</directory>
-      <includes>
-        <include>*.sh</include>
-        <include>*.ndjson</include>
-      </includes>
-    </fileSet>
-  </fileSets>
-
-</archetype-descriptor>
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/ES_IndexInit.sh b/external/elasticsearch/archetype/src/main/resources/archetype-resources/ES_IndexInit.sh
deleted file mode 100755
index 4b6a75ca3..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/ES_IndexInit.sh
+++ /dev/null
@@ -1,173 +0,0 @@
-ESHOST=${1:-"http://localhost:9200"}
-ESCREDENTIALS=${2:-"-u elastic:passwordhere"}
-
-# deletes and recreates a status index with a bespoke schema
-
-curl $ESCREDENTIALS -s -XDELETE "$ESHOST/status/" >  /dev/null
-
-echo "Deleted status index"
-
-# http://localhost:9200/status/_mapping/status?pretty
-
-echo "Creating status index with mapping"
-
-curl $ESCREDENTIALS -s -XPUT $ESHOST/status -H 'Content-Type: application/json' -d '
-{
-	"settings": {
-		"index": {
-			"number_of_shards": 10,
-			"number_of_replicas": 1,
-			"refresh_interval": "5s"
-		}
-	},
-	"mappings": {
-			"dynamic_templates": [{
-				"metadata": {
-					"path_match": "metadata.*",
-					"match_mapping_type": "string",
-					"mapping": {
-						"type": "keyword"
-					}
-				}
-			}],
-			"_source": {
-				"enabled": true
-			},
-			"properties": {
-				"key": {
-					"type": "keyword",
-					"index": true
-				},
-				"nextFetchDate": {
-					"type": "date",
-					"format": "date_optional_time"
-				},
-				"status": {
-					"type": "keyword"
-				},
-				"url": {
-					"type": "keyword"
-				}
-			}
-	}
-}'
-
-# deletes and recreates a status index with a bespoke schema
-
-curl $ESCREDENTIALS -s -XDELETE "$ESHOST/metrics*/" >  /dev/null
-
-echo ""
-echo "Deleted metrics index"
-
-curl $ESCREDENTIALS -s -XPUT $ESHOST/_ilm/policy/7d-deletion_policy -H 'Content-Type:application/json' -d '
-{
-    "policy": {
-        "phases": {
-            "delete": {
-                "min_age": "7d",
-                "actions": {
-                    "delete": {}
-                }
-            }
-        }
-    }
-}
-'
-
-echo "Creating metrics index with mapping"
-
-# http://localhost:9200/metrics/_mapping/status?pretty
-curl $ESCREDENTIALS -s -XPOST $ESHOST/_template/storm-metrics-template -H 'Content-Type: application/json' -d '
-{
-  "index_patterns": "metrics*",
-  "settings": {
-    "index": {
-      "number_of_shards": 1,
-      "refresh_interval": "30s"
-    },
-    "number_of_replicas": 0,
-    "lifecycle.name": "7d-deletion_policy"
-  },
-  "mappings": {
-      "_source":         { "enabled": true },
-      "properties": {
-          "name": {
-            "type": "keyword"
-          },
-          "stormId": {
-            "type": "keyword"
-          },
-          "srcComponentId": {
-            "type": "keyword"
-          },
-          "srcTaskId": {
-            "type": "short"
-          },
-          "srcWorkerHost": {
-            "type": "keyword"
-          },
-          "srcWorkerPort": {
-            "type": "integer"
-          },
-          "timestamp": {
-            "type": "date",
-            "format": "date_optional_time"
-          },
-          "value": {
-            "type": "double"
-          }
-      }
-  }
-}'
-
-# deletes and recreates a doc index with a bespoke schema
-
-curl $ESCREDENTIALS -s -XDELETE "$ESHOST/content*/" >  /dev/null
-
-echo ""
-echo "Deleted content index"
-
-echo "Creating content index with mapping"
-
-curl $ESCREDENTIALS -s -XPUT $ESHOST/content -H 'Content-Type: application/json' -d '
-{
-	"settings": {
-		"index": {
-			"number_of_shards": 5,
-			"number_of_replicas": 1,
-			"refresh_interval": "60s"
-		}
-	},
-	"mappings": {
-			"_source": {
-				"enabled": true
-			},
-			"properties": {
-				"content": {
-					"type": "text"
-				},
-				"description": {
-					"type": "text"
-				},
-				"domain": {
-					"type": "keyword"
-				},
-				"format": {
-					"type": "keyword"
-				},
-				"keywords": {
-					"type": "keyword"
-				},
-				"host": {
-					"type": "keyword"
-				},
-				"title": {
-					"type": "text"
-				},
-				"url": {
-					"type": "keyword"
-				}
-			}
-	}
-}'
-
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/README.md b/external/elasticsearch/archetype/src/main/resources/archetype-resources/README.md
deleted file mode 100644
index 27e9183e2..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/README.md
+++ /dev/null
@@ -1,62 +0,0 @@
-This has been generated by the StormCrawler Maven Archetype as a starting point for building your own crawler with Elasticsearch as a backend.
-Have a look at the code and resources and modify them to your heart's content. 
-
-First generate an uberjar:
-
-``` sh
-mvn clean package
-```
-
-then with Elasticsearch running locally, run `./ES_IndexInit.sh` to define the indices used by StormCrawler.
-
-The first step consists in creating a file _seeds.txt_ in the current directory and populating it with the URLs 
-to be used as a starting point for the crawl, e.g. 
-
-`echo "http://stormcrawler.net/" > seeds.txt`
-
-You can start the crawl topology using the Java class
-
-``` sh
-storm local target/${artifactId}-${version}.jar --local-ttl 3600 ${package}.ESCrawlTopology -- -conf crawler-conf.yaml -conf es-conf.yaml . seeds.txt
-```
-
-This will run the topology in local mode for 1 hour, using the URLs in _seeds.txt_ as a starting point. To start the topology in distributed mode, where it will run indefinitely, launch it with 'storm jar'.
-
-Alternatively, you can also use Flux to do the same but the injection is separated from the crawl:
-
-``` sh
-
-storm local target/${artifactId}-${version}.jar  org.apache.storm.flux.Flux es-injection.flux --local-ttl 3600
-
-storm local target/${artifactId}-${version}.jar  org.apache.storm.flux.Flux es-crawler.flux --local-ttl 3600
-```
-
-Note that in local mode, Flux uses a default TTL for the topology of 20 secs. The command above runs the topology for 1 hour.
-
-It is best to run the topology with `storm jar` to benefit from the Storm UI and logging. In that case, the topology runs continuously, as intended.
-
-Kibana
----------------------
-
-To import the dashboards into a local instance of Kibana, go into the folder _kibana_ and run the script _importKibana.sh_. 
-
-You should see something like 
-
-```
-Importing status dashboard into Kibana
-{"success":true,"successCount":4}
-Importing metrics dashboard into Kibana
-{"success":true,"successCount":9}
-```
-
-The [dashboard screen](http://localhost:5601/app/kibana#/dashboards) should show both the status and metrics dashboards. If you click on `Crawl Status`, you should see 2 tables containing the count of URLs per status and the top hostnames per URL count.
-The [Metrics dashboard](http://localhost:5601/app/kibana#/dashboard/Crawl-metrics) can be used to monitor the progress of the crawl.
-
-The file _storm.ndjson_ is used to display some of Storm's internal metrics and is not added by default.
-
-
-
-Happy crawling! If you have any questions, please ask on [StackOverflow with the tag stormcrawler](http://stackoverflow.com/questions/tagged/stormcrawler). 
-
-
-
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/crawler-conf.yaml b/external/elasticsearch/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
deleted file mode 100644
index d21929e71..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-# Custom configuration for StormCrawler
-# This is used to override the default values from crawler-default.xml and provide additional ones 
-# for your custom components.
-# Use this file with the parameter -conf when launching your extension of ConfigurableTopology.
-# This file does not contain all the key values but only the most frequently used ones. See crawler-default.xml for an extensive list.
-
-config: 
-  topology.workers: 1
-  topology.message.timeout.secs: 300
-  topology.max.spout.pending: 100
-  topology.debug: false
-
-  fetcher.threads.number: 50
-  
-  # override the JVM parameters for the workers
-  topology.worker.childopts: "-Xmx2g -Djava.net.preferIPv4Stack=true"
-
-  # mandatory when using Flux
-  topology.kryo.register:
-    - org.apache.stormcrawler.Metadata
-    - org.apache.stormcrawler.persistence.Status
-
-  # Lists the metadata to transfer to outlinks
-  # Used by Fetcher and SiteMapParser for redirections,
-  # discovered links, passing cookies to child pages, etc.
-  # These are also persisted for the parent document (see below).
-  # Allows wildcards, eg. "follow.*" transfers all metadata starting with "follow.".
-  # metadata.transfer:
-  # - customMetadataName
-
-  # Lists the metadata to persist to storage
-  # These are not transferred to the outlinks. Also allows wildcards, eg. "follow.*".
-  metadata.persist:
-   - _redirTo
-   - error.cause
-   - error.source
-   - isSitemap
-   - isFeed
-
-  # Agent name info - given here as an example. Do not be an anonynmous coward, use your real information!
-  # The full user agent value sent as part of the HTTP requests
-  # is built from the elements below. Only the agent.name is mandatory,
-  # it is also used to parse the robots.txt directives. 
-
-  # The agent name must be compliant with RFC 9309 (section 2.2.1) 
-  # i.e. it MUST contain only uppercase and lowercase letters ("a-z" and "A-Z), underscores ("_"), and hyphens ("-")
-  http.agent.name: "${http-agent-name}"
-  # version of your crawler
-  http.agent.version: "${http-agent-version}"
-  # description of what it does
-  http.agent.description: "${http-agent-description}"
-  # URL webmasters can go to to learn about it
-  http.agent.url: "${http-agent-url}"
-  # Finally, an email so that they can get in touch with you
-  http.agent.email: "${http-agent-email}"
-
-  http.protocol.implementation: "org.apache.stormcrawler.protocol.okhttp.HttpProtocol"
-  https.protocol.implementation: "org.apache.stormcrawler.protocol.okhttp.HttpProtocol"
-
-  # The maximum number of bytes for returned HTTP response bodies.
-  # The fetched page will be trimmed to 65KB in this case
-  # Set -1 to disable the limit.
-  http.content.limit: 65536
-
-  sitemap.discovery: true
-
-  # FetcherBolt queue dump => comment out to activate
-  # if a file exists on the worker machine with the corresponding port number
-  # the FetcherBolt will log the content of its internal queues to the logs
-  # fetcherbolt.queue.debug.filepath: "/tmp/fetcher-dump-{port}"
-
-  parsefilters.config.file: "parsefilters.json"
-  urlfilters.config.file: "urlfilters.json"
-  jsoup.filters.config.file: "jsoupfilters.json"
-
-  # revisit a page daily (value in minutes)
-  # set it to -1 to never refetch a page
-  fetchInterval.default: 1440
-
-  # revisit a page with a fetch error after 2 hours (value in minutes)
-  # set it to -1 to never refetch a page
-  fetchInterval.fetch.error: 120
-
-  # never revisit a page with an error (or set a value in minutes)
-  fetchInterval.error: -1
-  
-  # set to true if you don't need any text to be extracted by JSoup
-  textextractor.no.text: false
-
-  # text extraction for JSoupParserBolt
-  textextractor.include.pattern:
-   - DIV[id="maincontent"]
-   - DIV[itemprop="articleBody"]
-   - ARTICLE
-
-  textextractor.exclude.tags:
-   - STYLE
-   - SCRIPT
-
-  # needed for parsing with Tika
-  jsoup.treat.non.html.as.error: false
-
-  # restricts the documents types to be parsed with Tika
-  parser.mimetype.whitelist:
-   - application/.+word.*
-   - application/.+excel.*
-   - application/.+powerpoint.*
-   - application/.*pdf.*
-
-  # Tika parser configuration file
-  parse.tika.config.file: "tika-config.xml"
-
-  # custom fetch interval to be used when a document has the key/value in its metadata
-  # and has been fetched successfully (value in minutes)
-  # fetchInterval.FETCH_ERROR.isFeed=true: 30
-  # fetchInterval.isFeed=true: 10
-
-  # configuration for the classes extending AbstractIndexerBolt
-  # indexer.md.filter: "someKey=aValue"
-  indexer.url.fieldname: "url"
-  indexer.text.fieldname: "content"
-  indexer.canonical.name: "canonical"
-  # How to convert metadata key values into fields for indexing
-  # 
-  # if no alias is specified with =alias, the key value is used
-  # for instance below, _domain_ and _format_ will be used 
-  # as field names, whereas _title_ will be used for _parse.title_.
-  # You can specify the index of the value to store from the values array 
-  # by using the _key[index]_ format, e.g. _parse.title[0]_ would try to 
-  # get the first value for the metadata _parse.title_ (which is the default anyway).
-  # Finally, you can use a glob (*) to match all the keys, e.g. _parse.*_ would
-  # index all the keys with _parse_ as a prefix. Note that in that case, you can't
-  # specify an alias with =, nor can you specify an index.
-  indexer.md.mapping:
-  - parse.title=title
-  - parse.keywords=keywords
-  - parse.description=description
-  - domain
-  - format
-
-  # Metrics consumers:
-  topology.metrics.consumer.register:
-     - class: "org.apache.storm.metric.LoggingMetricsConsumer"
-       parallelism.hint: 1
-
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-conf.yaml b/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-conf.yaml
deleted file mode 100644
index 848173f5f..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-conf.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-# configuration for Elasticsearch resources
-  
-config:
-  # ES indexer bolt
-  # addresses can be specified as a full URL
-  # if not we assume that the protocol is http and the port 9200
-  es.indexer.addresses: "localhost"
-  es.indexer.index.name: "content"
-  # es.indexer.pipeline: "_PIPELINE_"
-  es.indexer.create: false
-  es.indexer.bulkActions: 100
-  es.indexer.flushInterval: "2s"
-  es.indexer.concurrentRequests: 1
-
-  # allows to use the Rest client on ES8+
-  es.indexer.compatibility.mode: false
-  
-  # ES metricsConsumer
-  es.metrics.addresses: "http://localhost:9200"
-  es.metrics.index.name: "metrics"
-
-  # allows to use the Rest client on ES8+
-  es.metrics.compatibility.mode: false
-  
-  # ES spout and persistence bolt
-  es.status.addresses: "http://localhost:9200"
-  es.status.index.name: "status"
-  #es.status.user: "USERNAME"
-  #es.status.password: "PASSWORD"
-  # the routing is done on the value of 'partition.url.mode'
-  es.status.routing: true
-  # stores the value used for grouping the URLs as a separate field
-  # needed by the spout implementations
-  # also used for routing if the value above is set to true 
-  es.status.routing.fieldname: "key"
-  es.status.bulkActions: 500
-  es.status.flushInterval: "5s"
-  es.status.concurrentRequests: 1
-
-  # allows to use the Rest client on ES8+
-  es.status.compatibility.mode: false
-  
-  ################
-  # spout config #
-  ################
-  
-  # positive or negative filters parsable by the Lucene Query Parser
-  # es.status.filterQuery: 
-  #  - "-(key:stormcrawler.net)"
-  #  - "-(key:digitalpebble.com)"
-
-  # time in secs for which the URLs will be considered for fetching after a ack or fail
-  spout.ttl.purgatory: 30
-  
-  # Min time (in msecs) to allow between 2 successive queries to ES
-  spout.min.delay.queries: 2000
-  
-  # Max time (in msecs) to allow between 2 successive queries to ES
-  spout.max.delay.queries: 20000
-
-  # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
-  # Setting this to -1 or a large value means that the ES will cache the results but also that less and less results
-  # might be returned.
-  spout.reset.fetchdate.after: 120
-
-  es.status.max.buckets: 50
-  es.status.max.urls.per.bucket: 2
-  # field to group the URLs into buckets
-  es.status.bucket.field: "key"
-  # fields to sort the URLs within a bucket
-  es.status.bucket.sort.field: 
-   - "nextFetchDate"
-   - "url"
-  # field to sort the buckets
-  es.status.global.sort.field: "nextFetchDate"
-
-  # CollapsingSpout : limits the deep paging by resetting the start offset for the ES query
-  es.status.max.start.offset: 500
-  
-  # AggregationSpout : sampling improves the performance on large crawls
-  es.status.sample: false
-
-  # max allowed duration of a query in sec 
-  es.status.query.timeout: -1
-
-  # AggregationSpout (expert): adds this value in mins to the latest date returned in the results and
-  # use it as nextFetchDate
-  es.status.recentDate.increase: -1
-  es.status.recentDate.min.gap: -1
-
-  topology.metrics.consumer.register:
-       - class: "org.apache.stormcrawler.elasticsearch.metrics.MetricsConsumer"
-         parallelism.hint: 1
-         #whitelist:
-         #  - "fetcher_counter"
-         #  - "fetcher_average.bytes_fetched"
-         #blacklist:
-         #  - "__receive.*"
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-crawler.flux b/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-crawler.flux
deleted file mode 100644
index 92d596725..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-crawler.flux
+++ /dev/null
@@ -1,141 +0,0 @@
-name: "crawler"
-
-includes:
-    - resource: true
-      file: "/crawler-default.yaml"
-      override: false
-
-    - resource: false
-      file: "crawler-conf.yaml"
-      override: true
-
-    - resource: false
-      file: "es-conf.yaml"
-      override: true
-
-spouts:
-  - id: "spout"
-    className: "org.apache.stormcrawler.elasticsearch.persistence.AggregationSpout"
-    parallelism: 10
-
-bolts:
-  - id: "partitioner"
-    className: "org.apache.stormcrawler.bolt.URLPartitionerBolt"
-    parallelism: 1
-  - id: "fetcher"
-    className: "org.apache.stormcrawler.bolt.FetcherBolt"
-    parallelism: 1
-  - id: "sitemap"
-    className: "org.apache.stormcrawler.bolt.SiteMapParserBolt"
-    parallelism: 1
-  - id: "parse"
-    className: "org.apache.stormcrawler.bolt.JSoupParserBolt"
-    parallelism: 1
-  - id: "shunt"
-    className: "org.apache.stormcrawler.tika.RedirectionBolt"
-    parallelism: 1 
-  - id: "tika"
-    className: "org.apache.stormcrawler.tika.ParserBolt"
-    parallelism: 1
-  - id: "index"
-    className: "org.apache.stormcrawler.elasticsearch.bolt.IndexerBolt"
-    parallelism: 1
-  - id: "status"
-    className: "org.apache.stormcrawler.elasticsearch.persistence.StatusUpdaterBolt"
-    parallelism: 1
-  - id: "deleter"
-    className: "org.apache.stormcrawler.elasticsearch.bolt.DeletionBolt"
-    parallelism: 1
-  - id: "status_metrics"
-    className: "org.apache.stormcrawler.elasticsearch.metrics.StatusMetricsBolt"
-    parallelism: 1
-
-streams:
-  - from: "spout"
-    to: "partitioner"
-    grouping:
-      type: SHUFFLE
-
-  - from: "__system"
-    to: "status_metrics"
-    grouping:
-      type: SHUFFLE
-      streamId: "__tick"
-
-  - from: "partitioner"
-    to: "fetcher"
-    grouping:
-      type: FIELDS
-      args: ["key"]
-
-  - from: "fetcher"
-    to: "sitemap"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "sitemap"
-    to: "parse"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "parse"
-    to: "shunt"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "shunt"
-    to: "tika"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-      streamId: "tika"
-
-  - from: "tika"
-    to: "index"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "shunt"
-    to: "index"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "fetcher"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "sitemap"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "parse"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "tika"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "index"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "status"
-    to: "deleter"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-      streamId: "deletion"
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-injection.flux b/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-injection.flux
deleted file mode 100644
index ad0e772b3..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/es-injection.flux
+++ /dev/null
@@ -1,50 +0,0 @@
-name: "injection"
-
-includes:
-    - resource: true
-      file: "/crawler-default.yaml"
-      override: false
-
-    - resource: false
-      file: "crawler-conf.yaml"
-      override: true
-
-    - resource: false
-      file: "es-conf.yaml"
-      override: true
-
-spouts:
-  - id: "filespout"
-    className: "org.apache.stormcrawler.spout.FileSpout"
-    parallelism: 1
-    constructorArgs:
-      - "."
-      - "seeds.txt"
-      - true
-
-bolts:
-  - id: "filter"
-    className: "org.apache.stormcrawler.bolt.URLFilterBolt"
-    parallelism: 1
-
-  - id: "status"
-    className: "org.apache.stormcrawler.elasticsearch.persistence.StatusUpdaterBolt"
-    parallelism: 1
-
-streams:
-  - from: "filespout"
-    to: "filter"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "filter"
-    to: "status"
-    grouping:
-      streamId: "status"
-      type: CUSTOM
-      customClass:
-        className: "org.apache.stormcrawler.util.URLStreamGrouping"
-        constructorArgs:
-          - "byDomain"
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/importKibana.sh b/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/importKibana.sh
deleted file mode 100755
index f0a2edb50..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/importKibana.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/sh
-
-BIN=$(dirname $0)
-
-echo "Importing status dashboard into Kibana"
-curl -X POST "localhost:5601/api/saved_objects/_import" -H "kbn-xsrf: true" --form file=@$BIN/status.ndjson
-echo ""
-
-echo "Importing metrics dashboard into Kibana"
-curl -X POST "localhost:5601/api/saved_objects/_import" -H "kbn-xsrf: true" --form file=@$BIN/metrics.ndjson
-echo ""
-
-# Storm internal metrics
-# curl -X POST "localhost:5601/api/saved_objects/_import" -H "kbn-xsrf: true" --form file=@$BIN/storm.ndjson
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/metrics.ndjson b/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/metrics.ndjson
deleted file mode 100644
index 20cbb2bc0..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/metrics.ndjson
+++ /dev/null
@@ -1,10 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:activethreads\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : # active threads","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"2\"}}],\"listeners\":{},\"title\":\"Fetcher : # active threads\"}"},"id":"Fetcher-:-#-active-threads","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.178Z","version":"WzksMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:num_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : num queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : num queues\"}"},"id":"Fetcher-:-num-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.175Z","version":"WzgsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : pages fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : pages fetched\"}"},"id":"Fetcher-:-pages-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.170Z","version":"WzcsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:in_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : URLs waiting in queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"addLegend\":false,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"mode\":\"grouped\",\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"spyPerPage\":10,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"5\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"4\"}}],\"listeners\":{},\"title\":\"Fetcher : URLs waiting in queues\"}"},"id":"Fetcher-:-URLs-waiting-in-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.160Z","version":"WzUsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.bytes_fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average bytes per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}}],\"listeners\":{},\"title\":\"Fetcher : average bytes per second\"}"},"id":"Fetcher-:-average-bytes-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.173Z","version":"WzYsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average pages per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Fetcher : average pages per second\"}"},"id":"Fetcher-:-average-pages-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.820Z","version":"WzEwLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.bytes_fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Total bytes fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"m\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Total bytes fetched\"}"},"id":"Total-bytes-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.823Z","version":"WzExLDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}},\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":24,\"y\":20,\"w\":12,\"h\":12,\"i\":\"1\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_0\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":12,\"y\":20,\"w\":12,\"h\":12,\"i\":\"2\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":0,\"w\":36,\"h\":12,\"i\":\"3\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_2\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":20,\"w\":12,\"h\":12,\"i\":\"4\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_3\"},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":40,\"w\":36,\"h\":8,\"i\":\"5\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_4\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":32,\"w\":36,\"h\":8,\"i\":\"6\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_5\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":12,\"w\":36,\"h\":8,\"i\":\"7\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_6\"}]","timeRestore":false,"title":"Crawl metrics","version":1},"id":"Crawl-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Fetcher-:-#-active-threads","name":"panel_0","type":"visualization"},{"id":"Fetcher-:-num-queues","name":"panel_1","type":"visualization"},{"id":"Fetcher-:-pages-fetched","name":"panel_2","type":"visualization"},{"id":"Fetcher-:-URLs-waiting-in-queues","name":"panel_3","type":"visualization"},{"id":"Fetcher-:-average-bytes-per-second","name":"panel_4","type":"visualization"},{"id":"Fetcher-:-average-pages-per-second","name":"panel_5","type":"visualization"},{"id":"Total-bytes-fetched","name":"panel_6","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:06:58.830Z","version":"WzQsMV0="}
-{"exportedCount":9,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/status.ndjson b/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/status.ndjson
deleted file mode 100644
index b3d0122e4..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/status.ndjson
+++ /dev/null
@@ -1,5 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"key\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"metadata._redirTo\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.depth\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Ecause\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Esource\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.fetch%2Eerror%2Ecount\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isFeed\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isSitemap\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.url%2Epath\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"nextFetchDate\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"status\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"url\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":true,\"readFromDocValues\":true}]","title":"status"},"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:07:47.130Z","version":"WzEzLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"status count","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"status\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"status count\"}"},"id":"status-count","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.278Z","version":"WzE1LDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Top Hosts","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"key\",\"size\":50,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"Top Hosts\"}"},"id":"Top-Hosts","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.281Z","version":"WzE2LDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"37874bbf-6607-435a-a231-94d81e9193e7\",\"gridData\":{\"x\":0,\"y\":0,\"w\":16,\"h\":20,\"i\":\"37874bbf-6607-435a-a231-94d81e9193e7\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"4faa5b74-1660-44f7-9227-89d900c8231e\",\"gridData\":{\"x\":16,\"y\":0,\"w\":16,\"h\":20,\"i\":\"4faa5b74-1660-44f7-9227-89d900c8231e\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Crawl status","version":1},"id":"Crawl-status","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"status-count","name":"panel_0","type":"visualization"},{"id":"Top-Hosts","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:07:47.948Z","version":"WzE0LDFd"}
-{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/storm.ndjson b/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/storm.ndjson
deleted file mode 100644
index 880c2326f..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/kibana/storm.ndjson
+++ /dev/null
@@ -1,5 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name: \\\"__receive.population\\\"\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Storm Receive Queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcTaskId\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcComponentId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}"},"id":"Storm-Receive-Queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.875Z","version":"WzIwLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"+srcComponentId: \\\"__system\\\" +name: memory\\\\/heap*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Memory Heap","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":true,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"split\",\"params\":{\"field\":\"srcWorkerHost\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"row\":true}}],\"listeners\":{}}"},"id":"Memory-Heap","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.877Z","version":"WzIxLDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\",\"gridData\":{\"x\":0,\"y\":0,\"w\":32,\"h\":8,\"i\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\",\"gridData\":{\"x\":0,\"y\":8,\"w\":32,\"h\":16,\"i\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Storm metrics","version":1},"id":"Storm-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Storm-Receive-Queues","name":"panel_0","type":"visualization"},{"id":"Memory-Heap","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:08:33.810Z","version":"WzE5LDFd"}
-{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
\ No newline at end of file
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/pom.xml b/external/elasticsearch/archetype/src/main/resources/archetype-resources/pom.xml
deleted file mode 100644
index cca05f97b..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/pom.xml
+++ /dev/null
@@ -1,149 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-	<modelVersion>4.0.0</modelVersion>
-	<groupId>${groupId}</groupId>
-	<artifactId>${artifactId}</artifactId>
-	<version>${version}</version>
-	<packaging>jar</packaging>
-
-	<name>${artifactId}</name>
-
-	<properties>
-		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-		<stormcrawler.version>${StormCrawlerVersion}</stormcrawler.version>
-		<storm.version>2.6.1</storm.version>
-	</properties>
-
-	<build>
-		<plugins>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-compiler-plugin</artifactId>
-				<version>3.11.0</version>
-				<configuration>
-					<source>11</source>
-					<target>11</target>
-				</configuration>
-			</plugin>
-			<plugin>
-				<groupId>org.codehaus.mojo</groupId>
-				<artifactId>exec-maven-plugin</artifactId>
-				<version>3.1.0</version>
-				<executions>
-					<execution>
-						<goals>
-							<goal>exec</goal>
-						</goals>
-					</execution>
-				</executions>
-				<configuration>
-					<executable>java</executable>
-					<includeProjectDependencies>true</includeProjectDependencies>
-					<includePluginDependencies>false</includePluginDependencies>
-					<classpathScope>compile</classpathScope>
-				</configuration>
-			</plugin>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-shade-plugin</artifactId>
-				<version>3.5.0</version>
-				<executions>
-					<execution>
-						<phase>package</phase>
-						<goals>
-							<goal>shade</goal>
-						</goals>
-						<configuration>
-							<createDependencyReducedPom>false</createDependencyReducedPom>
-							<transformers>
-								<transformer
-									implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
-								<transformer
-									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-									<mainClass>org.apache.storm.flux.Flux</mainClass>
-									<manifestEntries>
-										<Change></Change>
-										<Build-Date></Build-Date>
-									</manifestEntries>
-								</transformer>
-							</transformers>
-							<!-- The filters below are necessary if you want to include the Tika 
-								module -->
-							<filters>
-								<filter>
-									<artifact>*:*</artifact>
-									<excludes>
-										<exclude>META-INF/*.SF</exclude>
-										<exclude>META-INF/*.DSA</exclude>
-										<exclude>META-INF/*.RSA</exclude>
-									</excludes>
-								</filter>
-								<filter>
-									<!-- https://issues.apache.org/jira/browse/STORM-2428 -->
-									<artifact>org.apache.storm:flux-core</artifact>
-									<excludes>
-										<exclude>org/apache/commons/**</exclude>
-										<exclude>org/apache/http/**</exclude>
-										<exclude>org/yaml/**</exclude>
-									</excludes>
-								</filter>
-							</filters>
-						</configuration>
-					</execution>
-				</executions>
-			</plugin>
-		</plugins>
-	</build>
-
-	<dependencies>
-		<dependency>
-			<groupId>org.apache.stormcrawler</groupId>
-			<artifactId>storm-crawler-core</artifactId>
-			<version>${stormcrawler.version}</version>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.stormcrawler</groupId>
-			<artifactId>storm-crawler-elasticsearch</artifactId>
-			<version>${stormcrawler.version}</version>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.storm</groupId>
-			<artifactId>storm-client</artifactId>
-			<version>${storm.version}</version>
-			<scope>provided</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.storm</groupId>
-			<artifactId>flux-core</artifactId>
-			<version>${storm.version}</version>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.stormcrawler</groupId>
-			<artifactId>storm-crawler-tika</artifactId>
-			<version>${stormcrawler.version}</version>
-		</dependency>
-	</dependencies>
-</project>
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/java/ESCrawlTopology.java b/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/java/ESCrawlTopology.java
deleted file mode 100644
index c9eeaf713..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/java/ESCrawlTopology.java
+++ /dev/null
@@ -1,110 +0,0 @@
-#set($symbol_pound='#')#set($symbol_dollar='$')#set($symbol_escape='\')
-
-/**
- * Licensed to DigitalPebble Ltd under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * DigitalPebble licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package ${package};
-
-import org.apache.storm.metric.LoggingMetricsConsumer;
-import org.apache.storm.topology.TopologyBuilder;
-import org.apache.storm.tuple.Fields;
-
-import org.apache.stormcrawler.ConfigurableTopology;
-import org.apache.stormcrawler.Constants;
-import org.apache.stormcrawler.bolt.FetcherBolt;
-import org.apache.stormcrawler.bolt.JSoupParserBolt;
-import org.apache.stormcrawler.bolt.SiteMapParserBolt;
-import org.apache.stormcrawler.bolt.URLFilterBolt;
-import org.apache.stormcrawler.bolt.URLPartitionerBolt;
-import org.apache.stormcrawler.elasticsearch.bolt.DeletionBolt;
-import org.apache.stormcrawler.elasticsearch.bolt.IndexerBolt;
-import org.apache.stormcrawler.elasticsearch.metrics.MetricsConsumer;
-import org.apache.stormcrawler.elasticsearch.metrics.StatusMetricsBolt;
-import org.apache.stormcrawler.elasticsearch.persistence.AggregationSpout;
-import org.apache.stormcrawler.elasticsearch.persistence.StatusUpdaterBolt;
-import org.apache.stormcrawler.spout.FileSpout;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.apache.stormcrawler.util.URLStreamGrouping;
-import org.apache.stormcrawler.tika.ParserBolt;
-import org.apache.stormcrawler.tika.RedirectionBolt;
-
-/**
- * Dummy topology to play with the spouts and bolts on ElasticSearch
- */
-public class ESCrawlTopology extends ConfigurableTopology {
-
-	public static void main(String[] args) throws Exception {
-		ConfigurableTopology.start(new ESCrawlTopology(), args);
-	}
-
-	@Override
-	protected int run(String[] args) {
-		TopologyBuilder builder = new TopologyBuilder();
-
-		int numWorkers = ConfUtils.getInt(getConf(), "topology.workers", 1);
-
-		if (args.length == 0) {
-			System.err.println("ESCrawlTopology seed_dir file_filter");
-			return -1;
-		}
-
-		// set to the real number of shards ONLY if es.status.routing is set to
-		// true in the configuration
-		int numShards = 1;
-
-		builder.setSpout("filespout", new FileSpout(args[0], args[1], true));
-
-		Fields key = new Fields("url");
-
-		builder.setBolt("filter", new URLFilterBolt()).fieldsGrouping("filespout", Constants.StatusStreamName, key);
-
-		builder.setSpout("spout", new AggregationSpout(), numShards);
-
-		builder.setBolt("status_metrics", new StatusMetricsBolt()).shuffleGrouping("spout");
-
-		builder.setBolt("partitioner", new URLPartitionerBolt(), numWorkers).shuffleGrouping("spout");
-
-		builder.setBolt("fetch", new FetcherBolt(), numWorkers).fieldsGrouping("partitioner", new Fields("key"));
-
-		builder.setBolt("sitemap", new SiteMapParserBolt(), numWorkers).localOrShuffleGrouping("fetch");
-
-		builder.setBolt("parse", new JSoupParserBolt(), numWorkers).localOrShuffleGrouping("sitemap");
-
-		builder.setBolt("shunt", new RedirectionBolt()).localOrShuffleGrouping("parse");
-
-		builder.setBolt("tika", new ParserBolt()).localOrShuffleGrouping("shunt", "tika");
-
-		builder.setBolt("indexer", new IndexerBolt(), numWorkers).localOrShuffleGrouping("shunt")
-				.localOrShuffleGrouping("tika");
-
-		builder.setBolt("status", new StatusUpdaterBolt(), numWorkers)
-				.fieldsGrouping("fetch", Constants.StatusStreamName, key)
-				.fieldsGrouping("sitemap", Constants.StatusStreamName, key)
-				.fieldsGrouping("parse", Constants.StatusStreamName, key)
-				.fieldsGrouping("tika", Constants.StatusStreamName, key)
-				.fieldsGrouping("indexer", Constants.StatusStreamName, key)
-				.customGrouping("filter", Constants.StatusStreamName, new URLStreamGrouping());
-
-		builder.setBolt("deleter", new DeletionBolt(), numWorkers).localOrShuffleGrouping("status",
-				Constants.DELETION_STREAM_NAME);
-
-		conf.registerMetricsConsumer(MetricsConsumer.class);
-		conf.registerMetricsConsumer(LoggingMetricsConsumer.class);
-
-		return submit("crawl", conf, builder);
-	}
-}
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt b/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
deleted file mode 100644
index 389ef587b..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-# skip file: ftp: and mailto: urls
--^(file|ftp|mailto):
-
-# skip image and other suffixes we can't parse or are not likely to be relevant
-# if you want to crawl images or videos or archives then you should comment out this line
--(?i)\.(apk|deb|cab|iso|gif|jpg|png|svg|ico|css|sit|eps|wmf|rar|tar|jar|zip|gz|bz2|rpm|tgz|mov|exe|jpeg|jpe|bmp|js|mpg|mp3|mp4|m4a|ogv|kml|wmv|swf|flv|mkv|m4v|webm|ra|wma|wav|avi|xspf|m3u)(\?|&|$)
-
-# skip URLs with slash-delimited segment that repeats 3+ times, to break loops
-# very time-consuming : use BasicURLFilter instead
-# -.*(/[^/]+)/[^/]+\1/[^/]+\1/
-
-# exclude localhost and equivalents to avoid that information
-# can be leaked by placing faked links pointing to web interfaces
-# of services running on the crawling machine (e.g., Elasticsearch,
-# Storm)
-#
-# - exclude localhost and loop-back addresses
-#     http://localhost:8080
-#     http://127.0.0.1/ .. http://127.255.255.255/
-#     http://[::1]/
--^https?://(?:localhost|127(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){3}|\[::1\])(?::\d+)?(?:/|$)
-#
-# - exclude private IP address spaces
-#     10.0.0.0/8
--^https?://(?:10(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){3})(?::\d+)?(?:/|$)
-#     192.168.0.0/16
--^https?://(?:192\.168(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){2})(?::\d+)?(?:/|$)
-#     172.16.0.0/12
--^https?://(?:172\.(?:1[6789]|2[0-9]|3[01])(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){2})(?::\d+)?(?:/|$)
-
-# accept anything else
-+.
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml b/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
deleted file mode 100644
index 101bfd6b5..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
+++ /dev/null
@@ -1,78 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- This is the configuration file for the RegexUrlNormalize Class.
-     This is intended so that users can specify substitutions to be
-     done on URLs. The regex engine that is used is Perl5 compatible.
-     The rules are applied to URLs in the order they occur in this file.  -->
-
-<!-- WATCH OUT: an xml parser reads this file an ampersands must be
-     expanded to &amp; -->
-
-<!-- The following rules show how to strip out session IDs, default pages, 
-     interpage anchors, etc. Order does matter!  -->
-<regex-normalize>
-
-<!-- removes session ids from urls (such as jsessionid and PHPSESSID) -->
-<!--<regex>-->
-  <!--<pattern>(?i)(;?\b_?(l|j|bv_)?(sid|phpsessid|sessionid)=.*?)(\?|&amp;|#|$)</pattern>-->
-  <!--<substitution>$4</substitution>-->
-<!--</regex>-->
-
-<!-- changes default pages into standard for /index.html, etc. into /
-<regex>
-  <pattern>/((?i)index|default)\.((?i)js[pf]{1}?[afx]?|cgi|cfm|asp[x]?|[psx]?htm[l]?|php[3456]?)(\?|&amp;|#|$)</pattern>
-  <substitution>/$3</substitution>
-</regex> -->
-
-<!-- removes interpage href anchors such as site.com#location -->
-<!--<regex>-->
-  <!--<pattern>#.*?(\?|&amp;|$)</pattern>-->
-  <!--<substitution>$1</substitution>-->
-<!--</regex>-->
-
-<!-- cleans ?&amp;var=value into ?var=value -->
-<!--<regex>-->
-  <!--<pattern>\?&amp;</pattern>-->
-  <!--<substitution>\?</substitution>-->
-<!--</regex>-->
-
-<!-- cleans multiple sequential ampersands into a single ampersand -->
-<!--<regex>-->
-  <!--<pattern>&amp;{2,}</pattern>-->
-  <!--<substitution>&amp;</substitution>-->
-<!--</regex>-->
-
-<!-- removes trailing ? -->
-<!--<regex>-->
-  <!--<pattern>[\?&amp;\.]$</pattern>-->
-  <!--<substitution></substitution>-->
-<!--</regex>-->
-
-<!-- Removes query strings -->
-<!--<regex>-->
-    <!--<pattern>\?.*$</pattern>-->
-    <!--<substitution></substitution>-->
-<!--</regex>-->
-
-<!-- removes duplicate slashes -->
-<!--<regex>-->
-  <!--<pattern>(?&lt;!:)/{2,}</pattern>-->
-  <!--<substitution>/</substitution>-->
-<!--</regex>-->
-
-</regex-normalize>
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json b/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
deleted file mode 100644
index 4d87d8d5a..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "org.apache.stormcrawler.parse.JSoupFilters": [
-    {
-      "class": "org.apache.stormcrawler.jsoup.XPathFilter",
-      "name": "XPathFilter",
-      "params": {
-        "canonical": "//*[@rel=\"canonical\"]/@href",
-        "parse.description": [
-          "//*[@name=\"description\"]/@content",
-          "//*[@name=\"Description\"]/@content"
-        ],
-        "parse.title": [
-          "//TITLE/allText()",
-          "//META[@name=\"title\"]/@content"
-        ],
-        "parse.keywords": "//META[@name=\"keywords\"]/@content"
-      }
-    },
-    {
-      "class": "org.apache.stormcrawler.jsoup.LinkParseFilter",
-      "name": "LinkParseFilter",
-      "params": {
-        "pattern": "//FRAME/@src"
-      }
-    }
-  ]
-}
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json b/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
deleted file mode 100644
index 5d525830d..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "org.apache.stormcrawler.parse.ParseFilters": [
-    {
-      "class": "org.apache.stormcrawler.parse.filter.DomainParseFilter",
-      "name": "DomainParseFilter",
-      "params": {
-        "key": "domain",
-        "byHost": false
-       }
-    },
-    {
-      "class": "org.apache.stormcrawler.parse.filter.MimeTypeNormalization",
-      "name": "MimeTypeNormalization"
-    },
-    {
-      "class": "org.apache.stormcrawler.parse.filter.CommaSeparatedToMultivaluedMetadata",
-      "name": "CommaSeparatedToMultivaluedMetadata",
-      "params": {
-        "keys": ["parse.keywords"]
-       }
-    }
-  ]
-}
diff --git a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json b/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
deleted file mode 100644
index 6098631bb..000000000
--- a/external/elasticsearch/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-	"org.apache.stormcrawler.filtering.URLFilters": [
-		{
-			"class": "org.apache.stormcrawler.filtering.basic.BasicURLFilter",
-			"name": "BasicURLFilter",
-			"params": {
-				"maxPathRepetition": 3,
-				"maxLength": 1024
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.depth.MaxDepthFilter",
-			"name": "MaxDepthFilter",
-			"params": {
-				"maxDepth": -1
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.basic.BasicURLNormalizer",
-			"name": "BasicURLNormalizer",
-			"params": {
-				"removeAnchorPart": true,
-				"unmangleQueryString": true,
-				"checkValidURI": true,
-				"removeHashes": true,
-				"hostIDNtoASCII": true
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.host.HostURLFilter",
-			"name": "HostURLFilter",
-			"params": {
-				"ignoreOutsideHost": false,
-				"ignoreOutsideDomain": true
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.regex.RegexURLNormalizer",
-			"name": "RegexURLNormalizer",
-			"params": {
-				"regexNormalizerFile": "default-regex-normalizers.xml"
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.regex.RegexURLFilter",
-			"name": "RegexURLFilter",
-			"params": {
-				"regexFilterFile": "default-regex-filters.txt"
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.basic.SelfURLFilter",
-			"name": "SelfURLFilter"
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.sitemap.SitemapFilter",
-			"name": "SitemapFilter"
-		}
-	]
-}
diff --git a/external/elasticsearch/kibana b/external/elasticsearch/kibana
deleted file mode 120000
index 26554b3bc..000000000
--- a/external/elasticsearch/kibana
+++ /dev/null
@@ -1 +0,0 @@
-archetype/src/main/resources/archetype-resources/kibana
\ No newline at end of file
diff --git a/external/elasticsearch/pom.xml b/external/elasticsearch/pom.xml
deleted file mode 100644
index dc5f06158..000000000
--- a/external/elasticsearch/pom.xml
+++ /dev/null
@@ -1,93 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-
-	<parent>
-		<groupId>org.apache.stormcrawler</groupId>
-		<artifactId>storm-crawler-external</artifactId>
-		<version>2.12-SNAPSHOT</version>
-		<relativePath>../pom.xml</relativePath>
-	</parent>
-
-	<properties>
-		<elasticsearch.version>7.17.7</elasticsearch.version>
-	</properties>
-
-	<artifactId>storm-crawler-elasticsearch</artifactId>
-	<packaging>jar</packaging>
-
-	<name>storm-crawler-elasticsearch</name>
-	<url>https://github.com/apache/incubator-stormcrawler/tree/master/external/elasticsearch</url>
-	<description>Elasticsearch resources for StormCrawler</description>
-
-	<build>
-		<plugins>
-			<plugin>
-				<artifactId>maven-surefire-plugin</artifactId>
-				<executions>
-					<execution>
-						<id>default-test</id>
-						<phase>test</phase>
-						<goals>
-							<goal>test</goal>
-						</goals>
-					</execution>
-				</executions>
-				<configuration>
-					<systemPropertyVariables>
-						<elasticsearch-version>${elasticsearch.version}</elasticsearch-version>
-					</systemPropertyVariables>
-				</configuration>
-			</plugin>
-		</plugins>
-	</build>
-
-	<dependencies>
-		<dependency>
-			<groupId>org.elasticsearch.client</groupId>
-			<artifactId>elasticsearch-rest-high-level-client</artifactId>
-			<version>${elasticsearch.version}</version>
-		</dependency>
-		<dependency>
-			<groupId>org.elasticsearch.client</groupId>
-			<artifactId>elasticsearch-rest-client-sniffer</artifactId>
-			<version>${elasticsearch.version}</version>
-		</dependency>
-
-		<!-- https://mvnrepository.com/artifact/org.testcontainers/elasticsearch -->
-		<dependency>
-			<groupId>org.testcontainers</groupId>
-			<artifactId>elasticsearch</artifactId>
-			<scope>test</scope>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.stormcrawler</groupId>
-			<artifactId>storm-crawler-core</artifactId>
-			<version>${project.version}</version>
-			<type>test-jar</type>
-			<scope>test</scope>
-		</dependency>
-		
-	</dependencies>
-</project>
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/BulkItemResponseToFailedFlag.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/BulkItemResponseToFailedFlag.java
deleted file mode 100644
index d9492215e..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/BulkItemResponseToFailedFlag.java
+++ /dev/null
@@ -1,113 +0,0 @@
-package org.apache.stormcrawler.elasticsearch;
-
-import java.io.IOException;
-import org.elasticsearch.action.DocWriteRequest;
-import org.elasticsearch.action.DocWriteResponse;
-import org.elasticsearch.action.bulk.BulkItemResponse;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.rest.RestStatus;
-import org.elasticsearch.xcontent.ToXContent;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.jetbrains.annotations.NotNull;
-
-public final class BulkItemResponseToFailedFlag {
-    @NotNull public final BulkItemResponse response;
-    public final boolean failed;
-    @NotNull public final String id;
-
-    public BulkItemResponseToFailedFlag(@NotNull BulkItemResponse response, boolean failed) {
-        this.response = response;
-        this.failed = failed;
-        this.id = response.getId();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (!(o instanceof BulkItemResponseToFailedFlag)) return false;
-
-        BulkItemResponseToFailedFlag that = (BulkItemResponseToFailedFlag) o;
-
-        if (failed != that.failed) return false;
-        if (!response.equals(that.response)) return false;
-        return id.equals(that.id);
-    }
-
-    @Override
-    public int hashCode() {
-        int result = response.hashCode();
-        result = 31 * result + (failed ? 1 : 0);
-        result = 31 * result + id.hashCode();
-        return result;
-    }
-
-    @Override
-    public String toString() {
-        return "BulkItemResponseToFailedFlag{"
-                + "response="
-                + response
-                + ", failed="
-                + failed
-                + ", id='"
-                + id
-                + '\''
-                + '}';
-    }
-
-    public RestStatus status() {
-        return response.status();
-    }
-
-    public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params)
-            throws IOException {
-        return response.toXContent(builder, params);
-    }
-
-    public int getItemId() {
-        return response.getItemId();
-    }
-
-    public DocWriteRequest.OpType getOpType() {
-        return response.getOpType();
-    }
-
-    public String getIndex() {
-        return response.getIndex();
-    }
-
-    public String getType() {
-        return response.getType();
-    }
-
-    public long getVersion() {
-        return response.getVersion();
-    }
-
-    public <T extends DocWriteResponse> T getResponse() {
-        return response.getResponse();
-    }
-
-    public boolean isFailed() {
-        return response.isFailed();
-    }
-
-    public String getFailureMessage() {
-        return response.getFailureMessage();
-    }
-
-    public BulkItemResponse.Failure getFailure() {
-        return response.getFailure();
-    }
-
-    public void writeTo(StreamOutput out) throws IOException {
-        response.writeTo(out);
-    }
-
-    public void writeThin(StreamOutput out) throws IOException {
-        response.writeThin(out);
-    }
-
-    public boolean isFragment() {
-        return response.isFragment();
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/ElasticSearchConnection.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/ElasticSearchConnection.java
deleted file mode 100644
index 05e435f86..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/ElasticSearchConnection.java
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch;
-
-import static org.elasticsearch.client.RestClientBuilder.DEFAULT_CONNECT_TIMEOUT_MILLIS;
-import static org.elasticsearch.client.RestClientBuilder.DEFAULT_SOCKET_TIMEOUT_MILLIS;
-
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-import org.apache.http.HttpHost;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.action.bulk.BulkProcessor;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.client.Node;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.RestHighLevelClientBuilder;
-import org.elasticsearch.client.sniff.Sniffer;
-import org.elasticsearch.core.TimeValue;
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/** Utility class to instantiate an ES client and bulkprocessor based on the configuration. */
-public final class ElasticSearchConnection {
-
-    private static final Logger LOG = LoggerFactory.getLogger(ElasticSearchConnection.class);
-
-    @NotNull private final RestHighLevelClient client;
-
-    @NotNull private final BulkProcessor processor;
-
-    @Nullable private final Sniffer sniffer;
-
-    private ElasticSearchConnection(@NotNull RestHighLevelClient c, @NotNull BulkProcessor p) {
-        this(c, p, null);
-    }
-
-    private ElasticSearchConnection(
-            @NotNull RestHighLevelClient c, @NotNull BulkProcessor p, @Nullable Sniffer s) {
-        processor = p;
-        client = c;
-        sniffer = s;
-    }
-
-    public RestHighLevelClient getClient() {
-        return client;
-    }
-
-    public void addToProcessor(final IndexRequest request) {
-        processor.add(request);
-    }
-
-    public static RestHighLevelClient getClient(Map<String, Object> stormConf, String boltType) {
-
-        List<String> confighosts =
-                ConfUtils.loadListFromConf("es." + boltType + ".addresses", stormConf);
-
-        List<HttpHost> hosts = new ArrayList<>();
-
-        for (String host : confighosts) {
-            // no port specified? use default one
-            int port = 9200;
-            String scheme = "http";
-            // no scheme specified? use http
-            if (!host.startsWith(scheme)) {
-                host = "http://" + host;
-            }
-            URI uri = URI.create(host);
-            if (uri.getHost() == null) {
-                throw new RuntimeException("host undefined " + host);
-            }
-            if (uri.getPort() != -1) {
-                port = uri.getPort();
-            }
-            if (uri.getScheme() != null) {
-                scheme = uri.getScheme();
-            }
-            hosts.add(new HttpHost(uri.getHost(), port, scheme));
-        }
-
-        RestClientBuilder builder = RestClient.builder(hosts.toArray(new HttpHost[0]));
-
-        // authentication via user / password
-        String user = ConfUtils.getString(stormConf, "es." + boltType + ".user");
-        String password = ConfUtils.getString(stormConf, "es." + boltType + ".password");
-
-        String proxyhost = ConfUtils.getString(stormConf, "es." + boltType + ".proxy.host");
-
-        int proxyport = ConfUtils.getInt(stormConf, "es." + boltType + ".proxy.port", -1);
-
-        String proxyscheme =
-                ConfUtils.getString(stormConf, "es." + boltType + ".proxy.scheme", "http");
-
-        boolean needsUser = StringUtils.isNotBlank(user) && StringUtils.isNotBlank(password);
-        boolean needsProxy = StringUtils.isNotBlank(proxyhost) && proxyport != -1;
-
-        if (needsUser || needsProxy) {
-            builder.setHttpClientConfigCallback(
-                    httpClientBuilder -> {
-                        if (needsUser) {
-                            final CredentialsProvider credentialsProvider =
-                                    new BasicCredentialsProvider();
-                            credentialsProvider.setCredentials(
-                                    AuthScope.ANY, new UsernamePasswordCredentials(user, password));
-                            httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
-                        }
-                        if (needsProxy) {
-                            httpClientBuilder.setProxy(
-                                    new HttpHost(proxyhost, proxyport, proxyscheme));
-                        }
-                        return httpClientBuilder;
-                    });
-        }
-
-        int connectTimeout =
-                ConfUtils.getInt(
-                        stormConf,
-                        "es." + boltType + ".connect.timeout",
-                        DEFAULT_CONNECT_TIMEOUT_MILLIS);
-        int socketTimeout =
-                ConfUtils.getInt(
-                        stormConf,
-                        "es." + boltType + ".socket.timeout",
-                        DEFAULT_SOCKET_TIMEOUT_MILLIS);
-        // timeout until connection is established
-        builder.setRequestConfigCallback(
-                requestConfigBuilder ->
-                        requestConfigBuilder
-                                .setConnectTimeout(connectTimeout)
-                                .setSocketTimeout(socketTimeout) // Timeout when waiting
-                // for data
-                );
-
-        // TODO check if this has gone somewhere else in ES 7
-        // int maxRetryTimeout = ConfUtils.getInt(stormConf, "es." + boltType +
-        // ".max.retry.timeout",
-        // DEFAULT_MAX_RETRY_TIMEOUT_MILLIS);
-        // builder.setMaxRetryTimeoutMillis(maxRetryTimeout);
-
-        // TODO configure headers etc...
-        // Map<String, String> configSettings = (Map) stormConf
-        // .get("es." + boltType + ".settings");
-        // if (configSettings != null) {
-        // configSettings.forEach((k, v) -> settings.put(k, v));
-        // }
-
-        // use node selector only to log nodes listed in the config
-        // and/or discovered through sniffing
-        builder.setNodeSelector(
-                nodes -> {
-                    for (Node node : nodes) {
-                        LOG.debug(
-                                "Connected to ES node {} [{}] for {}",
-                                node.getName(),
-                                node.getHost(),
-                                boltType);
-                    }
-                });
-
-        final boolean compression =
-                ConfUtils.getBoolean(stormConf, "es." + boltType + ".compression", false);
-
-        builder.setCompressionEnabled(compression);
-
-        final boolean compatibilityMode =
-                ConfUtils.getBoolean(stormConf, "es." + boltType + ".compatibility.mode", false);
-
-        return new RestHighLevelClientBuilder(builder.build())
-                .setApiCompatibilityMode(compatibilityMode)
-                .build();
-    }
-
-    /**
-     * Creates a connection with a default listener. The values for bolt type are
-     * [indexer,status,metrics]
-     */
-    public static ElasticSearchConnection getConnection(
-            Map<String, Object> stormConf, String boltType) {
-        BulkProcessor.Listener listener =
-                new BulkProcessor.Listener() {
-                    @Override
-                    public void afterBulk(long arg0, BulkRequest arg1, BulkResponse arg2) {}
-
-                    @Override
-                    public void afterBulk(long arg0, BulkRequest arg1, Throwable arg2) {}
-
-                    @Override
-                    public void beforeBulk(long arg0, BulkRequest arg1) {}
-                };
-        return getConnection(stormConf, boltType, listener);
-    }
-
-    public static ElasticSearchConnection getConnection(
-            Map<String, Object> stormConf, String boltType, BulkProcessor.Listener listener) {
-
-        String flushIntervalString =
-                ConfUtils.getString(stormConf, "es." + boltType + ".flushInterval", "5s");
-
-        TimeValue flushInterval =
-                TimeValue.parseTimeValue(
-                        flushIntervalString, TimeValue.timeValueSeconds(5), "flushInterval");
-
-        int bulkActions = ConfUtils.getInt(stormConf, "es." + boltType + ".bulkActions", 50);
-
-        int concurrentRequests =
-                ConfUtils.getInt(stormConf, "es." + boltType + ".concurrentRequests", 1);
-
-        RestHighLevelClient client = getClient(stormConf, boltType);
-
-        boolean sniff = ConfUtils.getBoolean(stormConf, "es." + boltType + ".sniff", true);
-        Sniffer sniffer = null;
-        if (sniff) {
-            sniffer = Sniffer.builder(client.getLowLevelClient()).build();
-        }
-
-        BulkProcessor bulkProcessor =
-                BulkProcessor.builder(
-                                (request, bulkListener) ->
-                                        client.bulkAsync(
-                                                request, RequestOptions.DEFAULT, bulkListener),
-                                listener,
-                                boltType + "-bulk-processor")
-                        .setFlushInterval(flushInterval)
-                        .setBulkActions(bulkActions)
-                        .setConcurrentRequests(concurrentRequests)
-                        .build();
-
-        return new ElasticSearchConnection(client, bulkProcessor, sniffer);
-    }
-
-    private boolean isClosed = false;
-
-    public void close() {
-
-        if (isClosed) {
-            LOG.warn("Tried to close an already closed connection!");
-            return;
-        }
-
-        // Maybe some kind of identifier?
-        LOG.debug("Start closing the ElasticSearchConnection");
-
-        // First, close the BulkProcessor ensuring pending actions are flushed
-        try {
-            boolean success = processor.awaitClose(60, TimeUnit.SECONDS);
-            if (!success) {
-                throw new RuntimeException(
-                        "Failed to flush pending actions when closing BulkProcessor");
-            }
-        } catch (InterruptedException e) {
-            throw new RuntimeException(e);
-        }
-
-        if (sniffer != null) {
-            sniffer.close();
-        }
-
-        // Now close the actual client
-        try {
-            client.close();
-        } catch (IOException e) {
-            // ignore silently
-            LOG.trace("Client threw IO exception.");
-        }
-
-        isClosed = true;
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/DeletionBolt.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/DeletionBolt.java
deleted file mode 100644
index cbdfa9f94..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/DeletionBolt.java
+++ /dev/null
@@ -1,106 +0,0 @@
-package org.apache.stormcrawler.elasticsearch.bolt;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Map;
-import org.apache.storm.task.OutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.storm.topology.OutputFieldsDeclarer;
-import org.apache.storm.topology.base.BaseRichBolt;
-import org.apache.storm.tuple.Tuple;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.slf4j.LoggerFactory;
-
-/**
- * Deletes documents to ElasticSearch. This should be connected to the StatusUpdaterBolt via the
- * 'deletion' stream and will remove the documents with a status of ERROR one by one. Note that this
- * component will also try to delete documents even though they were never indexed and it currently
- * won't delete documents which were indexed under the canonical URL.
- */
-public class DeletionBolt extends BaseRichBolt {
-
-    static final org.slf4j.Logger LOG =
-            LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-    private static final String ESBoltType = "indexer";
-
-    private OutputCollector _collector;
-
-    private String indexName;
-
-    private RestHighLevelClient client;
-
-    public DeletionBolt() {}
-
-    /** Sets the index name instead of taking it from the configuration. * */
-    public DeletionBolt(String indexName) {
-        this.indexName = indexName;
-    }
-
-    @Override
-    public void prepare(
-            Map<String, Object> conf, TopologyContext context, OutputCollector collector) {
-        _collector = collector;
-        if (indexName == null) {
-            indexName = ConfUtils.getString(conf, IndexerBolt.ESIndexNameParamName, "content");
-        }
-        client = ElasticSearchConnection.getClient(conf, ESBoltType);
-    }
-
-    @Override
-    public void cleanup() {
-        if (client != null)
-            try {
-                client.close();
-            } catch (IOException e) {
-            }
-    }
-
-    @Override
-    public void execute(Tuple tuple) {
-        String url = tuple.getStringByField("url");
-        Metadata metadata = (Metadata) tuple.getValueByField("metadata");
-
-        // keep it simple for now and ignore cases where the canonical URL was
-        // used
-        String docID = getDocumentID(metadata, url);
-        DeleteRequest dr = new DeleteRequest(getIndexName(metadata), docID);
-        try {
-            client.delete(dr, RequestOptions.DEFAULT);
-        } catch (IOException e) {
-            _collector.fail(tuple);
-            LOG.error("Exception caught while deleting", e);
-            return;
-        }
-        _collector.ack(tuple);
-    }
-
-    /**
-     * Get the document id.
-     *
-     * @param metadata The {@link Metadata}.
-     * @param url The normalised url.
-     * @return Return the normalised url SHA-256 digest as String.
-     */
-    protected String getDocumentID(Metadata metadata, String url) {
-        return org.apache.commons.codec.digest.DigestUtils.sha256Hex(url);
-    }
-
-    @Override
-    public void declareOutputFields(OutputFieldsDeclarer arg0) {
-        // none
-    }
-
-    /**
-     * Must be overridden for implementing custom index names based on some metadata information By
-     * Default, indexName coming from config is used
-     */
-    protected String getIndexName(Metadata m) {
-        return indexName;
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/IndexerBolt.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/IndexerBolt.java
deleted file mode 100644
index e0a86fa82..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/IndexerBolt.java
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.bolt;
-
-import static org.apache.stormcrawler.Constants.StatusStreamName;
-import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
-
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-import com.github.benmanes.caffeine.cache.RemovalCause;
-import com.github.benmanes.caffeine.cache.RemovalListener;
-import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.stream.Collectors;
-import org.apache.commons.lang.StringUtils;
-import org.apache.storm.metric.api.MultiCountMetric;
-import org.apache.storm.metric.api.MultiReducedMetric;
-import org.apache.storm.task.OutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.storm.tuple.Tuple;
-import org.apache.storm.tuple.Values;
-import org.apache.stormcrawler.Constants;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.elasticsearch.BulkItemResponseToFailedFlag;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.indexing.AbstractIndexerBolt;
-import org.apache.stormcrawler.persistence.Status;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.apache.stormcrawler.util.PerSecondReducer;
-import org.elasticsearch.action.DocWriteRequest;
-import org.elasticsearch.action.bulk.BulkItemResponse;
-import org.elasticsearch.action.bulk.BulkProcessor;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.rest.RestStatus;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Sends documents to ElasticSearch. Indexes all the fields from the tuples or a Map
- * &lt;String,Object&gt; from a named field.
- */
-public class IndexerBolt extends AbstractIndexerBolt
-        implements RemovalListener<String, List<Tuple>>, BulkProcessor.Listener {
-
-    private static final Logger LOG = LoggerFactory.getLogger(IndexerBolt.class);
-
-    private static final String ESBoltType = "indexer";
-
-    static final String ESIndexNameParamName = "es.indexer.index.name";
-    private static final String ESCreateParamName = "es.indexer.create";
-    private static final String ESIndexPipelineParamName = "es.indexer.pipeline";
-
-    private OutputCollector _collector;
-
-    private String indexName;
-
-    private String pipeline;
-
-    // whether the document will be created only if it does not exist or
-    // overwritten
-    private boolean create = false;
-
-    private MultiCountMetric eventCounter;
-
-    private ElasticSearchConnection connection;
-
-    private MultiReducedMetric perSecMetrics;
-
-    private Cache<String, List<Tuple>> waitAck;
-
-    // Be fair due to cache timeout
-    private final ReentrantLock waitAckLock = new ReentrantLock(true);
-
-    public IndexerBolt() {}
-
-    /** Sets the index name instead of taking it from the configuration. * */
-    public IndexerBolt(String indexName) {
-        this.indexName = indexName;
-    }
-
-    @Override
-    public void prepare(
-            Map<String, Object> conf, TopologyContext context, OutputCollector collector) {
-        super.prepare(conf, context, collector);
-        _collector = collector;
-        if (indexName == null) {
-            indexName = ConfUtils.getString(conf, IndexerBolt.ESIndexNameParamName, "content");
-        }
-
-        create = ConfUtils.getBoolean(conf, IndexerBolt.ESCreateParamName, false);
-        pipeline = ConfUtils.getString(conf, IndexerBolt.ESIndexPipelineParamName);
-
-        try {
-            connection = ElasticSearchConnection.getConnection(conf, ESBoltType, this);
-        } catch (Exception e1) {
-            LOG.error("Can't connect to ElasticSearch", e1);
-            throw new RuntimeException(e1);
-        }
-
-        this.eventCounter =
-                context.registerMetric("ElasticSearchIndexer", new MultiCountMetric(), 10);
-
-        this.perSecMetrics =
-                context.registerMetric(
-                        "Indexer_average_persec",
-                        new MultiReducedMetric(new PerSecondReducer()),
-                        10);
-
-        waitAck =
-                Caffeine.newBuilder()
-                        .expireAfterWrite(60, TimeUnit.SECONDS)
-                        .removalListener(this)
-                        .build();
-
-        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), 10);
-    }
-
-    public void onRemoval(
-            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
-        if (!cause.wasEvicted()) return;
-        if (value != null) {
-            LOG.error("Purged from waitAck {} with {} values", key, value.size());
-            for (Tuple t : value) {
-                _collector.fail(t);
-            }
-        } else {
-            // This should never happen, but log it anyway.
-            LOG.error("Purged from waitAck {} with no values", key);
-        }
-    }
-
-    @Override
-    public void cleanup() {
-        if (connection != null) connection.close();
-    }
-
-    @Override
-    public void execute(Tuple tuple) {
-
-        String url = tuple.getStringByField("url");
-
-        // Distinguish the value used for indexing
-        // from the one used for the status
-        String normalisedurl = valueForURL(tuple);
-
-        LOG.info("Indexing {} as {}", url, normalisedurl);
-
-        Metadata metadata = (Metadata) tuple.getValueByField("metadata");
-
-        boolean keep = filterDocument(metadata);
-        if (!keep) {
-            LOG.info("Filtered {}", url);
-            eventCounter.scope("Filtered").incrBy(1);
-            // treat it as successfully processed even if
-            // we do not index it
-            _collector.emit(StatusStreamName, tuple, new Values(url, metadata, Status.FETCHED));
-            _collector.ack(tuple);
-            return;
-        }
-
-        String docID = getDocumentID(metadata, normalisedurl);
-
-        try {
-            XContentBuilder builder = jsonBuilder().startObject();
-
-            // display text of the document?
-            if (StringUtils.isNotBlank(fieldNameForText())) {
-                final String text = trimText(tuple.getStringByField("text"));
-                if (!ignoreEmptyFields() || StringUtils.isNotBlank(text)) {
-                    builder.field(fieldNameForText(), trimText(text));
-                }
-            }
-
-            // send URL as field?
-            if (StringUtils.isNotBlank(fieldNameForURL())) {
-                builder.field(fieldNameForURL(), normalisedurl);
-            }
-
-            // which metadata to display?
-            Map<String, String[]> keyVals = filterMetadata(metadata);
-
-            for (String fieldName : keyVals.keySet()) {
-                String[] values = keyVals.get(fieldName);
-                if (values.length == 1) {
-                    if (!ignoreEmptyFields() || StringUtils.isNotBlank(values[0])) {
-                        builder.field(fieldName, values[0]);
-                    }
-                } else if (values.length > 1) {
-                    builder.array(fieldName, values);
-                }
-            }
-
-            builder.endObject();
-
-            IndexRequest indexRequest =
-                    new IndexRequest(getIndexName(metadata)).source(builder).id(docID);
-
-            DocWriteRequest.OpType optype = DocWriteRequest.OpType.INDEX;
-
-            if (create) {
-                optype = DocWriteRequest.OpType.CREATE;
-            }
-
-            indexRequest.opType(optype);
-
-            if (pipeline != null) {
-                indexRequest.setPipeline(pipeline);
-            }
-
-            connection.addToProcessor(indexRequest);
-
-            eventCounter.scope("Indexed").incrBy(1);
-            perSecMetrics.scope("Indexed").update(1);
-
-            waitAckLock.lock();
-            try {
-                List<Tuple> tt = waitAck.getIfPresent(docID);
-                if (tt == null) {
-                    tt = new LinkedList<>();
-                    waitAck.put(docID, tt);
-                }
-                tt.add(tuple);
-                LOG.debug("Added to waitAck {} with ID {} total {}", url, docID, tt.size());
-            } finally {
-                waitAckLock.unlock();
-            }
-        } catch (IOException e) {
-            LOG.error("Error building document for ES", e);
-            // do not send to status stream so that it gets replayed
-            _collector.fail(tuple);
-
-            waitAckLock.lock();
-            try {
-                waitAck.invalidate(docID);
-            } finally {
-                waitAckLock.unlock();
-            }
-        }
-    }
-
-    /**
-     * Must be overridden for implementing custom index names based on some metadata information By
-     * Default, indexName coming from config is used
-     */
-    protected String getIndexName(Metadata m) {
-        return indexName;
-    }
-
-    @Override
-    public void beforeBulk(long executionId, BulkRequest request) {
-        eventCounter.scope("bulks_sent").incrBy(1);
-    }
-
-    @Override
-    public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
-        eventCounter.scope("bulks_received").incrBy(1);
-        eventCounter.scope("bulk_msec").incrBy(response.getTook().getMillis());
-
-        var idsToBulkItemsWithFailedFlag =
-                Arrays.stream(response.getItems())
-                        .map(
-                                bir -> {
-                                    String id = bir.getId();
-                                    BulkItemResponse.Failure f = bir.getFailure();
-                                    boolean failed = false;
-                                    if (f != null) {
-                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
-                                            eventCounter.scope("doc_conflicts").incrBy(1);
-                                            LOG.debug("Doc conflict ID {}", id);
-                                        } else {
-                                            failed = true;
-                                        }
-                                    }
-                                    return new BulkItemResponseToFailedFlag(bir, failed);
-                                })
-                        .collect(
-                                // https://github.com/DigitalPebble/storm-crawler/issues/832
-                                Collectors.groupingBy(
-                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
-                                        Collectors.toUnmodifiableList()));
-
-        Map<String, List<Tuple>> presentTuples;
-        long estimatedSize;
-        Set<String> debugInfo = null;
-        waitAckLock.lock();
-        try {
-            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
-            if (!presentTuples.isEmpty()) {
-                waitAck.invalidateAll(presentTuples.keySet());
-            }
-            estimatedSize = waitAck.estimatedSize();
-            // Only if we have to.
-            if (LOG.isDebugEnabled() && estimatedSize > 0L) {
-                debugInfo = new HashSet<>(waitAck.asMap().keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        int ackCount = 0;
-        int failureCount = 0;
-
-        for (var entry : presentTuples.entrySet()) {
-            final var id = entry.getKey();
-            final var associatedTuple = entry.getValue();
-            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
-
-            BulkItemResponseToFailedFlag selected;
-
-            if (bulkItemsWithFailedFlag.size() == 1) {
-                selected = bulkItemsWithFailedFlag.get(0);
-            } else {
-                // Fallback if there are multiple responses for the same id
-                BulkItemResponseToFailedFlag tmp = null;
-                var ctFailed = 0;
-                for (var buwff : bulkItemsWithFailedFlag) {
-                    if (tmp == null) {
-                        tmp = buwff;
-                    }
-                    if (buwff.failed) ctFailed++;
-                    else tmp = buwff;
-                }
-                if (ctFailed != bulkItemsWithFailedFlag.size()) {
-                    LOG.warn(
-                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
-                            id);
-                }
-                selected = Objects.requireNonNull(tmp);
-            }
-
-            if (associatedTuple != null) {
-                LOG.debug("Found {} tuple(s) for ID {}", associatedTuple.size(), id);
-                for (Tuple t : associatedTuple) {
-                    String url = (String) t.getValueByField("url");
-
-                    Metadata metadata = (Metadata) t.getValueByField("metadata");
-
-                    if (!selected.failed) {
-                        ackCount++;
-                        _collector.emit(
-                                StatusStreamName, t, new Values(url, metadata, Status.FETCHED));
-                        _collector.ack(t);
-                    } else {
-                        failureCount++;
-                        var failure = selected.getFailure();
-                        LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
-                        // there is something wrong with the content we should
-                        // treat
-                        // it as an ERROR
-                        if (selected.getFailure().getStatus().equals(RestStatus.BAD_REQUEST)) {
-                            metadata.setValue(Constants.STATUS_ERROR_SOURCE, "ES indexing");
-                            metadata.setValue(Constants.STATUS_ERROR_MESSAGE, "invalid content");
-                            _collector.emit(
-                                    StatusStreamName, t, new Values(url, metadata, Status.ERROR));
-                            _collector.ack(t);
-                            LOG.debug("Acked {} with ID {}", url, id);
-                        } else {
-                            LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
-                            // there is something wrong with the content we
-                            // should
-                            // treat
-                            // it as an ERROR
-                            if (failure.getStatus().equals(RestStatus.BAD_REQUEST)) {
-                                metadata.setValue(Constants.STATUS_ERROR_SOURCE, "ES indexing");
-                                metadata.setValue(
-                                        Constants.STATUS_ERROR_MESSAGE, "invalid content");
-                                _collector.emit(
-                                        StatusStreamName,
-                                        t,
-                                        new Values(url, metadata, Status.ERROR));
-                                _collector.ack(t);
-                            }
-                            // otherwise just fail it
-                            else {
-                                _collector.fail(t);
-                            }
-                        }
-                    }
-                }
-            } else {
-                LOG.warn("Could not find unacked tuples for {}", entry.getKey());
-            }
-        }
-
-        LOG.info(
-                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
-                executionId,
-                idsToBulkItemsWithFailedFlag.size(),
-                estimatedSize,
-                ackCount,
-                failureCount);
-        if (debugInfo != null) {
-            for (String kinaw : debugInfo) {
-                LOG.debug("Still in wait ack after bulk response [{}] => {}", executionId, kinaw);
-            }
-        }
-    }
-
-    @Override
-    public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
-        eventCounter.scope("bulks_received").incrBy(1);
-        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
-
-        final var failedIds =
-                request.requests().stream()
-                        .map(DocWriteRequest::id)
-                        .collect(Collectors.toUnmodifiableSet());
-        waitAckLock.lock();
-        Map<String, List<Tuple>> failedTupleLists;
-        try {
-            failedTupleLists = waitAck.getAllPresent(failedIds);
-            if (!failedTupleLists.isEmpty()) {
-                waitAck.invalidateAll(failedTupleLists.keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        for (var id : failedIds) {
-            var failedTuples = failedTupleLists.get(id);
-            if (failedTuples != null) {
-                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
-                for (Tuple x : failedTuples) {
-                    // fail it
-                    eventCounter.scope("failed").incrBy(1);
-                    _collector.fail(x);
-                }
-            } else {
-                LOG.warn("Could not find unacked tuple for {}", id);
-            }
-        }
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/filtering/JSONURLFilterWrapper.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/filtering/JSONURLFilterWrapper.java
deleted file mode 100644
index 068875ecf..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/filtering/JSONURLFilterWrapper.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.filtering;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import java.io.ByteArrayInputStream;
-import java.net.URL;
-import java.util.Map;
-import java.util.Timer;
-import java.util.TimerTask;
-import org.apache.stormcrawler.JSONResource;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.filtering.URLFilter;
-import org.elasticsearch.action.get.GetRequest;
-import org.elasticsearch.action.get.GetResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Wraps a URLFilter whose resources are in a JSON file that can be stored in ES. The benefit of
- * doing this is that the resources can be refreshed automatically and modified without having to
- * recompile the jar and restart the topology. The connection to ES is done via the config and uses
- * a new bolt type 'config'.
- *
- * <p>The configuration of the delegate is done in the urlfilters.json as usual.
- *
- * <pre>
- *  {
- *     "class": "org.apache.stormcrawler.elasticsearch.filtering.JSONURLFilterWrapper",
- *     "name": "ESFastURLFilter",
- *     "params": {
- *         "refresh": "60",
- *         "delegate": {
- *             "class": "org.apache.stormcrawler.filtering.regex.FastURLFilter",
- *             "params": {
- *                 "file": "fast.urlfilter.json"
- *             }
- *         }
- *     }
- *  }
- * </pre>
- *
- * The resource file can be pushed to ES with
- *
- * <pre>
- *  curl -XPUT 'localhost:9200/config/config/fast.urlfilter.json?pretty' -H 'Content-Type: application/json' -d @fast.urlfilter.json
- * </pre>
- */
-public class JSONURLFilterWrapper extends URLFilter {
-
-    private static final Logger LOG = LoggerFactory.getLogger(JSONURLFilterWrapper.class);
-
-    private URLFilter delegatedURLFilter;
-
-    public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
-
-        String urlfilterclass = null;
-
-        JsonNode delegateNode = filterParams.get("delegate");
-        if (delegateNode == null) {
-            throw new RuntimeException("delegateNode undefined!");
-        }
-
-        JsonNode node = delegateNode.get("class");
-        if (node != null && node.isTextual()) {
-            urlfilterclass = node.asText();
-        }
-
-        if (urlfilterclass == null) {
-            throw new RuntimeException("urlfilter.class undefined!");
-        }
-
-        // load an instance of the delegated parsefilter
-        try {
-            Class<?> filterClass = Class.forName(urlfilterclass);
-
-            boolean subClassOK = URLFilter.class.isAssignableFrom(filterClass);
-            if (!subClassOK) {
-                throw new RuntimeException(
-                        "Filter " + urlfilterclass + " does not extend URLFilter");
-            }
-
-            delegatedURLFilter = (URLFilter) filterClass.newInstance();
-
-            // check that it implements JSONResource
-            if (!JSONResource.class.isInstance(delegatedURLFilter)) {
-                throw new RuntimeException(
-                        "Filter " + urlfilterclass + " does not implement JSONResource");
-            }
-
-        } catch (Exception e) {
-            LOG.error("Can't setup {}: {}", urlfilterclass, e);
-            throw new RuntimeException("Can't setup " + urlfilterclass, e);
-        }
-
-        // configure it
-        node = delegateNode.get("params");
-
-        delegatedURLFilter.configure(stormConf, node);
-
-        int refreshRate = 600;
-
-        node = filterParams.get("refresh");
-        if (node != null && node.isInt()) {
-            refreshRate = node.asInt(refreshRate);
-        }
-
-        final JSONResource resource = (JSONResource) delegatedURLFilter;
-
-        new Timer()
-                .schedule(
-                        new TimerTask() {
-                            private RestHighLevelClient esClient;
-
-                            public void run() {
-                                if (esClient == null) {
-                                    try {
-                                        esClient =
-                                                ElasticSearchConnection.getClient(
-                                                        stormConf, "config");
-                                    } catch (Exception e) {
-                                        LOG.error("Exception while creating ES connection", e);
-                                    }
-                                }
-                                if (esClient != null) {
-                                    LOG.info("Reloading json resources from ES");
-                                    try {
-                                        GetResponse response =
-                                                esClient.get(
-                                                        new GetRequest(
-                                                                "config",
-                                                                "config",
-                                                                resource.getResourceFile()),
-                                                        RequestOptions.DEFAULT);
-                                        resource.loadJSONResources(
-                                                new ByteArrayInputStream(
-                                                        response.getSourceAsBytes()));
-                                    } catch (Exception e) {
-                                        LOG.error("Can't load config from ES", e);
-                                    }
-                                }
-                            }
-                        },
-                        0,
-                        refreshRate * 1000);
-    }
-
-    @Override
-    public @Nullable String filter(
-            @Nullable URL sourceUrl,
-            @Nullable Metadata sourceMetadata,
-            @NotNull String urlToFilter) {
-        return delegatedURLFilter.filter(sourceUrl, sourceMetadata, urlToFilter);
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/MetricsConsumer.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/MetricsConsumer.java
deleted file mode 100644
index 5f2270420..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/MetricsConsumer.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.metrics;
-
-import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
-
-import java.text.SimpleDateFormat;
-import java.util.Collection;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Map.Entry;
-import org.apache.storm.metric.api.IMetricsConsumer;
-import org.apache.storm.task.IErrorReporter;
-import org.apache.storm.task.TopologyContext;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Sends metrics to an Elasticsearch index. The ES details are set in the configuration; an optional
- * argument sets a date format to append to the index name.
- *
- * <pre>
- *   topology.metrics.consumer.register:
- *        - class: "org.apache.stormcrawler.elasticsearch.metrics.MetricsConsumer"
- *          parallelism.hint: 1
- *          argument: "yyyy-MM-dd"
- * </pre>
- */
-public class MetricsConsumer implements IMetricsConsumer {
-
-    private final Logger LOG = LoggerFactory.getLogger(getClass());
-
-    private static final String ESBoltType = "metrics";
-
-    /** name of the index to use for the metrics (default : metrics) * */
-    private static final String ESMetricsIndexNameParamName = "es." + ESBoltType + ".index.name";
-
-    private String indexName;
-
-    private ElasticSearchConnection connection;
-
-    private String stormID;
-
-    /** optional date format passed as argument, must be parsable as a SimpleDateFormat */
-    private SimpleDateFormat dateFormat;
-
-    @Override
-    public void prepare(
-            Map stormConf,
-            Object registrationArgument,
-            TopologyContext context,
-            IErrorReporter errorReporter) {
-        indexName = ConfUtils.getString(stormConf, ESMetricsIndexNameParamName, "metrics");
-        stormID = context.getStormId();
-        if (registrationArgument != null) {
-            dateFormat = new SimpleDateFormat((String) registrationArgument);
-            LOG.info("Using date format {}", registrationArgument);
-        }
-        try {
-            connection = ElasticSearchConnection.getConnection(stormConf, ESBoltType);
-        } catch (Exception e1) {
-            LOG.error("Can't connect to ElasticSearch", e1);
-            throw new RuntimeException(e1);
-        }
-    }
-
-    @Override
-    public void cleanup() {
-        if (connection != null) connection.close();
-    }
-
-    @Override
-    public void handleDataPoints(TaskInfo taskInfo, Collection<DataPoint> dataPoints) {
-        final Date now = new Date();
-        for (DataPoint dataPoint : dataPoints) {
-            handleDataPoints(taskInfo, dataPoint.name, dataPoint.value, now);
-        }
-    }
-
-    private void handleDataPoints(
-            final TaskInfo taskInfo, final String nameprefix, final Object value, final Date now) {
-        if (value instanceof Number) {
-            indexDataPoint(taskInfo, now, nameprefix, ((Number) value).doubleValue());
-        } else if (value instanceof Map) {
-            Iterator<Entry> keyValiter = ((Map) value).entrySet().iterator();
-            while (keyValiter.hasNext()) {
-                Entry entry = keyValiter.next();
-                String newnameprefix = nameprefix + "." + entry.getKey();
-                handleDataPoints(taskInfo, newnameprefix, entry.getValue(), now);
-            }
-        } else if (value instanceof Collection) {
-            for (Object collectionObj : (Collection) value) {
-                handleDataPoints(taskInfo, nameprefix, collectionObj, now);
-            }
-        } else {
-            LOG.warn("Found data point value {} of {}", nameprefix, value.getClass().toString());
-        }
-    }
-
-    /**
-     * Returns the name of the index that metrics will be written to.
-     *
-     * @return elastic index name
-     */
-    private String getIndexName(Date timestamp) {
-        if (dateFormat == null) return indexName;
-
-        StringBuilder sb = new StringBuilder(indexName);
-        sb.append("-").append(dateFormat.format(timestamp));
-        return sb.toString();
-    }
-
-    private void indexDataPoint(TaskInfo taskInfo, Date timestamp, String name, double value) {
-        try {
-            XContentBuilder builder = jsonBuilder().startObject();
-            builder.field("stormId", stormID);
-            builder.field("srcComponentId", taskInfo.srcComponentId);
-            builder.field("srcTaskId", taskInfo.srcTaskId);
-            builder.field("srcWorkerHost", taskInfo.srcWorkerHost);
-            builder.field("srcWorkerPort", taskInfo.srcWorkerPort);
-            builder.field("name", name);
-            builder.field("value", value);
-            builder.field("timestamp", timestamp);
-            builder.endObject();
-
-            IndexRequest indexRequest = new IndexRequest(getIndexName(timestamp)).source(builder);
-            connection.addToProcessor(indexRequest);
-        } catch (Exception e) {
-            LOG.error("problem when building request for ES", e);
-        }
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/StatusMetricsBolt.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/StatusMetricsBolt.java
deleted file mode 100644
index eeda63026..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/metrics/StatusMetricsBolt.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.metrics;
-
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.storm.Config;
-import org.apache.storm.task.OutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.storm.topology.OutputFieldsDeclarer;
-import org.apache.storm.topology.base.BaseRichBolt;
-import org.apache.storm.tuple.Tuple;
-import org.apache.storm.utils.TupleUtils;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.core.CountRequest;
-import org.elasticsearch.client.core.CountResponse;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Queries the status index periodically to get the count of URLs per status. This bolt can be
- * connected to the output of any other bolt and will not produce anything as output.
- */
-public class StatusMetricsBolt extends BaseRichBolt {
-
-    private static final Logger LOG = LoggerFactory.getLogger(StatusMetricsBolt.class);
-
-    private static final String ESBoltType = "status";
-    private static final String ESStatusIndexNameParamName = "es.status.index.name";
-
-    private String indexName;
-
-    private ElasticSearchConnection connection;
-
-    private Map<String, Long> latestStatusCounts = new HashMap<>(6);
-
-    private int freqStats = 60;
-
-    private OutputCollector _collector;
-
-    private transient StatusActionListener[] listeners;
-
-    private class StatusActionListener implements ActionListener<CountResponse> {
-
-        private final String name;
-
-        private boolean ready = true;
-
-        public boolean isReady() {
-            return ready;
-        }
-
-        public void busy() {
-            this.ready = false;
-        }
-
-        StatusActionListener(String statusName) {
-            name = statusName;
-        }
-
-        @Override
-        public void onResponse(CountResponse response) {
-            ready = true;
-            LOG.debug("Got {} counts for status:{}", response.getCount(), name);
-            latestStatusCounts.put(name, response.getCount());
-        }
-
-        @Override
-        public void onFailure(Exception e) {
-            ready = true;
-            LOG.error("Failure when getting counts for status:{}", name, e);
-        }
-    }
-
-    @Override
-    public void prepare(
-            Map<String, Object> stormConf, TopologyContext context, OutputCollector collector) {
-        _collector = collector;
-        indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName, "status");
-        try {
-            connection = ElasticSearchConnection.getConnection(stormConf, ESBoltType);
-        } catch (Exception e1) {
-            LOG.error("Can't connect to ElasticSearch", e1);
-            throw new RuntimeException(e1);
-        }
-
-        context.registerMetric(
-                "status.count",
-                () -> {
-                    return latestStatusCounts;
-                },
-                freqStats);
-
-        listeners = new StatusActionListener[6];
-
-        listeners[0] = new StatusActionListener("DISCOVERED");
-        listeners[1] = new StatusActionListener("FETCHED");
-        listeners[2] = new StatusActionListener("FETCH_ERROR");
-        listeners[3] = new StatusActionListener("REDIRECTION");
-        listeners[4] = new StatusActionListener("ERROR");
-        listeners[5] = new StatusActionListener("TOTAL");
-    }
-
-    @Override
-    public Map<String, Object> getComponentConfiguration() {
-        Config conf = new Config();
-        conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, freqStats);
-        return conf;
-    }
-
-    @Override
-    public void execute(Tuple input) {
-        _collector.ack(input);
-
-        // this bolt can be connected to anything
-        // we just want to trigger a new search when the input is a tick tuple
-        if (!TupleUtils.isTick(input)) {
-            return;
-        }
-
-        for (StatusActionListener listener : listeners) {
-            // still waiting for results from previous request
-            if (!listener.isReady()) {
-                LOG.debug("Not ready to get counts for status {}", listener.name);
-                continue;
-            }
-            CountRequest request = new CountRequest(indexName);
-            if (!listener.name.equalsIgnoreCase("TOTAL")) {
-                SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-                sourceBuilder.query(QueryBuilders.termQuery("status", listener.name));
-                request.source(sourceBuilder);
-            }
-            listener.busy();
-            connection.getClient().countAsync(request, RequestOptions.DEFAULT, listener);
-        }
-    }
-
-    @Override
-    public void cleanup() {
-        connection.close();
-    }
-
-    @Override
-    public void declareOutputFields(OutputFieldsDeclarer declarer) {
-        // NONE - THIS BOLT DOES NOT GET CONNECTED TO ANY OTHERS
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/parse/filter/JSONResourceWrapper.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/parse/filter/JSONResourceWrapper.java
deleted file mode 100644
index 5b34830aa..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/parse/filter/JSONResourceWrapper.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.parse.filter;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import java.io.ByteArrayInputStream;
-import java.util.Map;
-import java.util.Timer;
-import java.util.TimerTask;
-import org.apache.stormcrawler.JSONResource;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.parse.ParseFilter;
-import org.apache.stormcrawler.parse.ParseResult;
-import org.elasticsearch.action.get.GetRequest;
-import org.elasticsearch.action.get.GetResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.jetbrains.annotations.NotNull;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.DocumentFragment;
-
-/**
- * Wraps a ParseFilter whose resources are in a JSON file that can be stored in ES. The benefit of
- * doing this is that the resources can be refreshed automatically and modified without having to
- * recompile the jar and restart the topology. The connection to ES is done via the config and uses
- * a new bolt type 'config'.
- *
- * <p>The configuration of the delegate is done in the parsefilters.json as usual.
- *
- * <pre>
- *  {
- *     "class": "org.apache.stormcrawler.elasticsearch.parse.filter.JSONResourceWrapper",
- *     "name": "ESCollectionTagger",
- *     "params": {
- *         "refresh": "60",
- *         "delegate": {
- *             "class": "org.apache.stormcrawler.parse.filter.CollectionTagger",
- *             "params": {
- *                 "file": "collections.json"
- *             }
- *         }
- *     }
- *  }
- * </pre>
- *
- * The resource file can be pushed to ES with
- *
- * <pre>
- *  curl -XPUT "$ESHOST/config/_create/collections.json" -H 'Content-Type: application/json' -d @src/main/resources/collections.json
- * </pre>
- */
-public class JSONResourceWrapper extends ParseFilter {
-
-    private static final Logger LOG = LoggerFactory.getLogger(JSONResourceWrapper.class);
-
-    private ParseFilter delegatedParseFilter;
-
-    public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
-
-        String parsefilterclass = null;
-
-        JsonNode delegateNode = filterParams.get("delegate");
-        if (delegateNode == null) {
-            throw new RuntimeException("delegateNode undefined!");
-        }
-
-        JsonNode node = delegateNode.get("class");
-        if (node != null && node.isTextual()) {
-            parsefilterclass = node.asText();
-        }
-
-        if (parsefilterclass == null) {
-            throw new RuntimeException("parsefilter.class undefined!");
-        }
-
-        // load an instance of the delegated parsefilter
-        try {
-            Class<?> filterClass = Class.forName(parsefilterclass);
-
-            boolean subClassOK = ParseFilter.class.isAssignableFrom(filterClass);
-            if (!subClassOK) {
-                throw new RuntimeException(
-                        "Filter " + parsefilterclass + " does not extend ParseFilter");
-            }
-
-            delegatedParseFilter = (ParseFilter) filterClass.newInstance();
-
-            // check that it implements JSONResource
-            if (!JSONResource.class.isInstance(delegatedParseFilter)) {
-                throw new RuntimeException(
-                        "Filter " + parsefilterclass + " does not implement JSONResource");
-            }
-
-        } catch (Exception e) {
-            LOG.error("Can't setup {}: {}", parsefilterclass, e);
-            throw new RuntimeException("Can't setup " + parsefilterclass, e);
-        }
-
-        // configure it
-        node = delegateNode.get("params");
-
-        delegatedParseFilter.configure(stormConf, node);
-
-        int refreshRate = 600;
-
-        node = filterParams.get("refresh");
-        if (node != null && node.isInt()) {
-            refreshRate = node.asInt(refreshRate);
-        }
-
-        final JSONResource resource = (JSONResource) delegatedParseFilter;
-
-        new Timer()
-                .schedule(
-                        new TimerTask() {
-                            private RestHighLevelClient esClient;
-
-                            public void run() {
-                                if (esClient == null) {
-                                    try {
-                                        esClient =
-                                                ElasticSearchConnection.getClient(
-                                                        stormConf, "config");
-                                    } catch (Exception e) {
-                                        LOG.error("Exception while creating ES connection", e);
-                                    }
-                                }
-                                if (esClient != null) {
-                                    LOG.info("Reloading json resources from ES");
-                                    try {
-                                        GetResponse response =
-                                                esClient.get(
-                                                        new GetRequest(
-                                                                "config",
-                                                                resource.getResourceFile()),
-                                                        RequestOptions.DEFAULT);
-                                        resource.loadJSONResources(
-                                                new ByteArrayInputStream(
-                                                        response.getSourceAsBytes()));
-                                    } catch (Exception e) {
-                                        LOG.error("Can't load config from ES", e);
-                                    }
-                                }
-                            }
-                        },
-                        0,
-                        refreshRate * 1000);
-    }
-
-    @Override
-    public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) {
-        delegatedParseFilter.filter(URL, content, doc, parse);
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/AbstractSpout.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/AbstractSpout.java
deleted file mode 100644
index ec5b32c2e..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/AbstractSpout.java
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.persistence;
-
-import java.io.IOException;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import org.apache.storm.spout.SpoutOutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.persistence.AbstractQueryingSpout;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.search.SearchHit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public abstract class AbstractSpout extends AbstractQueryingSpout {
-
-    private static final Logger LOG = LoggerFactory.getLogger(AbstractSpout.class);
-
-    protected static final String ESBoltType = "status";
-    protected static final String ESStatusIndexNameParamName = "es.status.index.name";
-
-    /** Field name to use for aggregating * */
-    protected static final String ESStatusBucketFieldParamName = "es.status.bucket.field";
-
-    protected static final String ESStatusMaxBucketParamName = "es.status.max.buckets";
-    protected static final String ESStatusMaxURLsParamName = "es.status.max.urls.per.bucket";
-
-    /** Field name to use for sorting the URLs within a bucket, not used if empty or null. */
-    protected static final String ESStatusBucketSortFieldParamName = "es.status.bucket.sort.field";
-
-    /** Field name to use for sorting the buckets, not used if empty or null. */
-    protected static final String ESStatusGlobalSortFieldParamName = "es.status.global.sort.field";
-
-    protected static final String ESStatusFilterParamName = "es.status.filterQuery";
-
-    protected static final String ESStatusQueryTimeoutParamName = "es.status.query.timeout";
-
-    /** Query to use as a positive filter, set by es.status.filterQuery */
-    protected List<String> filterQueries = null;
-
-    protected String indexName;
-
-    protected static RestHighLevelClient client;
-
-    /**
-     * when using multiple instances - each one is in charge of a specific shard useful when
-     * sharding based on host or domain to guarantee a good mix of URLs
-     */
-    protected int shardID = -1;
-
-    /** Used to distinguish between instances in the logs * */
-    protected String logIdprefix = "";
-
-    /** Field name used for field collapsing e.g. key * */
-    protected String partitionField;
-
-    protected int maxURLsPerBucket = 10;
-
-    protected int maxBucketNum = 10;
-
-    protected List<String> bucketSortField = new LinkedList<>();
-
-    protected String totalSortField = "";
-
-    protected Date queryDate;
-
-    protected int queryTimeout = -1;
-
-    @Override
-    public void open(
-            Map<String, Object> stormConf,
-            TopologyContext context,
-            SpoutOutputCollector collector) {
-
-        super.open(stormConf, context, collector);
-
-        indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName, "status");
-
-        // one ES client per JVM
-        synchronized (AbstractSpout.class) {
-            try {
-                if (client == null) {
-                    client = ElasticSearchConnection.getClient(stormConf, ESBoltType);
-                }
-            } catch (Exception e1) {
-                LOG.error("Can't connect to ElasticSearch", e1);
-                throw new RuntimeException(e1);
-            }
-        }
-
-        // if more than one instance is used we expect their number to be the
-        // same as the number of shards
-        int totalTasks = context.getComponentTasks(context.getThisComponentId()).size();
-        if (totalTasks > 1) {
-            logIdprefix =
-                    "[" + context.getThisComponentId() + " #" + context.getThisTaskIndex() + "] ";
-
-            // determine the number of shards so that we can restrict the
-            // search
-
-            // TODO use the admin API when it gets available
-            // TODO or the low level one with
-            // https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shards-stores.html
-            // TODO identify local shards and use those if possible
-
-            // ClusterSearchShardsRequest request = new
-            // ClusterSearchShardsRequest(
-            // indexName);
-            // ClusterSearchShardsResponse shardresponse = client.admin()
-            // .cluster().searchShards(request).actionGet();
-            // ClusterSearchShardsGroup[] shardgroups =
-            // shardresponse.getGroups();
-            // if (totalTasks != shardgroups.length) {
-            // throw new RuntimeException(
-            // "Number of ES spout instances should be the same as number of
-            // shards ("
-            // + shardgroups.length + ") but is " + totalTasks);
-            // }
-            // shardID = shardgroups[context.getThisTaskIndex()].getShardId()
-            // .getId();
-
-            // TEMPORARY simply use the task index as shard index
-            shardID = context.getThisTaskIndex();
-            LOG.info("{} assigned shard ID {}", logIdprefix, shardID);
-        }
-
-        partitionField = ConfUtils.getString(stormConf, ESStatusBucketFieldParamName, "key");
-
-        bucketSortField = ConfUtils.loadListFromConf(ESStatusBucketSortFieldParamName, stormConf);
-
-        totalSortField = ConfUtils.getString(stormConf, ESStatusGlobalSortFieldParamName);
-
-        maxURLsPerBucket = ConfUtils.getInt(stormConf, ESStatusMaxURLsParamName, 1);
-        maxBucketNum = ConfUtils.getInt(stormConf, ESStatusMaxBucketParamName, 10);
-
-        queryTimeout = ConfUtils.getInt(stormConf, ESStatusQueryTimeoutParamName, -1);
-
-        filterQueries = ConfUtils.loadListFromConf(ESStatusFilterParamName, stormConf);
-    }
-
-    /** Builds a query and use it retrieve the results from ES * */
-    protected abstract void populateBuffer();
-
-    protected final boolean addHitToBuffer(SearchHit hit) {
-        Map<String, Object> keyValues = hit.getSourceAsMap();
-        String url = (String) keyValues.get("url");
-        // is already being processed - skip it!
-        if (beingProcessed.containsKey(url)) {
-            return false;
-        }
-        Metadata metadata = fromKeyValues(keyValues);
-        addHitInfoToMetadata(metadata, hit);
-
-        return buffer.add(url, metadata);
-    }
-
-    protected void addHitInfoToMetadata(Metadata metadata, SearchHit hit) {}
-
-    protected final Metadata fromKeyValues(Map<String, Object> keyValues) {
-        Map<String, List<String>> mdAsMap = (Map<String, List<String>>) keyValues.get("metadata");
-        Metadata metadata = new Metadata();
-        if (mdAsMap != null) {
-            Iterator<Entry<String, List<String>>> mdIter = mdAsMap.entrySet().iterator();
-            while (mdIter.hasNext()) {
-                Entry<String, List<String>> mdEntry = mdIter.next();
-                String key = mdEntry.getKey();
-                // periods are not allowed in ES2 - replace with %2E
-                key = key.replaceAll("%2E", "\\.");
-                Object mdValObj = mdEntry.getValue();
-                // single value
-                if (mdValObj instanceof String) {
-                    metadata.addValue(key, (String) mdValObj);
-                }
-                // multi valued
-                else {
-                    metadata.addValues(key, (List<String>) mdValObj);
-                }
-            }
-        }
-        return metadata;
-    }
-
-    @Override
-    public void ack(Object msgId) {
-        LOG.debug("{}  Ack for {}", logIdprefix, msgId);
-        super.ack(msgId);
-    }
-
-    @Override
-    public void fail(Object msgId) {
-        LOG.info("{}  Fail for {}", logIdprefix, msgId);
-        super.fail(msgId);
-    }
-
-    @Override
-    public void close() {
-        if (client != null)
-            try {
-                client.close();
-            } catch (IOException e) {
-            }
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/AggregationSpout.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/AggregationSpout.java
deleted file mode 100644
index 0e1f69dae..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/AggregationSpout.java
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.persistence;
-
-import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
-
-import java.time.Instant;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import org.apache.commons.lang.StringUtils;
-import org.apache.storm.spout.SpoutOutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.Aggregations;
-import org.elasticsearch.search.aggregations.BucketOrder;
-import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregation;
-import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedAggregationBuilder;
-import org.elasticsearch.search.aggregations.bucket.terms.Terms;
-import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
-import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
-import org.elasticsearch.search.aggregations.metrics.TopHits;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.FieldSortBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
-import org.joda.time.format.ISODateTimeFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Spout which pulls URL from an ES index. Use a single instance unless you use 'es.status.routing'
- * with the StatusUpdaterBolt, in which case you need to have exactly the same number of spout
- * instances as ES shards. Guarantees a good mix of URLs by aggregating them by an arbitrary field
- * e.g. key.
- */
-public class AggregationSpout extends AbstractSpout implements ActionListener<SearchResponse> {
-
-    private static final Logger LOG = LoggerFactory.getLogger(AggregationSpout.class);
-
-    private static final String ESStatusSampleParamName = "es.status.sample";
-    private static final String ESMostRecentDateIncreaseParamName = "es.status.recentDate.increase";
-    private static final String ESMostRecentDateMinGapParamName = "es.status.recentDate.min.gap";
-
-    private boolean sample = false;
-
-    private int recentDateIncrease = -1;
-    private int recentDateMinGap = -1;
-
-    protected Set<String> currentBuckets;
-
-    @Override
-    public void open(
-            Map<String, Object> stormConf,
-            TopologyContext context,
-            SpoutOutputCollector collector) {
-        sample = ConfUtils.getBoolean(stormConf, ESStatusSampleParamName, sample);
-        recentDateIncrease =
-                ConfUtils.getInt(stormConf, ESMostRecentDateIncreaseParamName, recentDateIncrease);
-        recentDateMinGap =
-                ConfUtils.getInt(stormConf, ESMostRecentDateMinGapParamName, recentDateMinGap);
-        super.open(stormConf, context, collector);
-        currentBuckets = new HashSet<>();
-    }
-
-    @Override
-    protected void populateBuffer() {
-
-        if (queryDate == null) {
-            queryDate = new Date();
-            lastTimeResetToNOW = Instant.now();
-        }
-
-        String formattedQueryDate = ISODateTimeFormat.dateTimeNoMillis().print(queryDate.getTime());
-
-        LOG.info("{} Populating buffer with nextFetchDate <= {}", logIdprefix, formattedQueryDate);
-
-        BoolQueryBuilder queryBuilder =
-                boolQuery()
-                        .filter(QueryBuilders.rangeQuery("nextFetchDate").lte(formattedQueryDate));
-
-        if (filterQueries != null) {
-            for (String filterQuery : filterQueries) {
-                queryBuilder.filter(QueryBuilders.queryStringQuery(filterQuery));
-            }
-        }
-
-        SearchRequest request = new SearchRequest(indexName);
-
-        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-        sourceBuilder.query(queryBuilder);
-        sourceBuilder.from(0);
-        sourceBuilder.size(0);
-        sourceBuilder.explain(false);
-        sourceBuilder.trackTotalHits(false);
-
-        if (queryTimeout != -1) {
-            sourceBuilder.timeout(new TimeValue(queryTimeout, TimeUnit.SECONDS));
-        }
-
-        TermsAggregationBuilder aggregations =
-                AggregationBuilders.terms("partition").field(partitionField).size(maxBucketNum);
-
-        org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder tophits =
-                AggregationBuilders.topHits("docs").size(maxURLsPerBucket).explain(false);
-
-        // sort within a bucket
-        for (String bsf : bucketSortField) {
-            FieldSortBuilder sorter = SortBuilders.fieldSort(bsf).order(SortOrder.ASC);
-            tophits.sort(sorter);
-        }
-
-        aggregations.subAggregation(tophits);
-
-        // sort between buckets
-        if (StringUtils.isNotBlank(totalSortField)) {
-            org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder minBuilder =
-                    AggregationBuilders.min("top_hit").field(totalSortField);
-            aggregations.subAggregation(minBuilder);
-            aggregations.order(BucketOrder.aggregation("top_hit", true));
-        }
-
-        if (sample) {
-            DiversifiedAggregationBuilder sab = new DiversifiedAggregationBuilder("sample");
-            sab.field(partitionField).maxDocsPerValue(maxURLsPerBucket);
-            sab.shardSize(maxURLsPerBucket * maxBucketNum);
-            sab.subAggregation(aggregations);
-            sourceBuilder.aggregation(sab);
-        } else {
-            sourceBuilder.aggregation(aggregations);
-        }
-
-        request.source(sourceBuilder);
-
-        // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-preference.html
-        // _shards:2,3
-        // specific shard but ideally a local copy of it
-        if (shardID != -1) {
-            request.preference("_shards:" + shardID + "|_local");
-        }
-
-        // dump query to log
-        LOG.debug("{} ES query {}", logIdprefix, request);
-
-        LOG.trace("{} isInquery set to true");
-        isInQuery.set(true);
-        client.searchAsync(request, RequestOptions.DEFAULT, this);
-    }
-
-    @Override
-    public void onFailure(Exception arg0) {
-        LOG.error("{} Exception with ES query", logIdprefix, arg0);
-        markQueryReceivedNow();
-    }
-
-    @Override
-    public void onResponse(SearchResponse response) {
-        long timeTaken = System.currentTimeMillis() - getTimeLastQuerySent();
-
-        Aggregations aggregs = response.getAggregations();
-
-        if (aggregs == null) {
-            markQueryReceivedNow();
-            return;
-        }
-
-        SingleBucketAggregation sample = aggregs.get("sample");
-        if (sample != null) {
-            aggregs = sample.getAggregations();
-        }
-
-        Terms agg = aggregs.get("partition");
-
-        int numhits = 0;
-        int numBuckets = 0;
-        int alreadyprocessed = 0;
-
-        Instant mostRecentDateFound = null;
-
-        currentBuckets.clear();
-
-        // For each entry
-        Iterator<Terms.Bucket> iterator = (Iterator<Bucket>) agg.getBuckets().iterator();
-        while (iterator.hasNext()) {
-            Terms.Bucket entry = iterator.next();
-            String key = (String) entry.getKey(); // bucket key
-
-            currentBuckets.add(key);
-
-            long docCount = entry.getDocCount(); // Doc count
-
-            int hitsForThisBucket = 0;
-
-            SearchHit lastHit = null;
-
-            // filter results so that we don't include URLs we are already
-            // being processed
-            TopHits topHits = entry.getAggregations().get("docs");
-            for (SearchHit hit : topHits.getHits().getHits()) {
-
-                LOG.debug(
-                        "{} -> id [{}], _source [{}]",
-                        logIdprefix,
-                        hit.getId(),
-                        hit.getSourceAsString());
-
-                hitsForThisBucket++;
-
-                lastHit = hit;
-
-                Map<String, Object> keyValues = hit.getSourceAsMap();
-                String url = (String) keyValues.get("url");
-
-                // consider only the first document of the last bucket
-                // for optimising the nextFetchDate
-                if (hitsForThisBucket == 1 && !iterator.hasNext()) {
-                    String strDate = (String) keyValues.get("nextFetchDate");
-                    try {
-                        mostRecentDateFound = Instant.parse(strDate);
-                    } catch (Exception e) {
-                        throw new RuntimeException("can't parse date :" + strDate);
-                    }
-                }
-
-                // is already being processed or in buffer - skip it!
-                if (beingProcessed.containsKey(url)) {
-                    LOG.debug("{} -> already processed: {}", logIdprefix, url);
-                    alreadyprocessed++;
-                    continue;
-                }
-
-                Metadata metadata = fromKeyValues(keyValues);
-                boolean added = buffer.add(url, metadata);
-                if (!added) {
-                    LOG.debug("{} -> already in buffer: {}", logIdprefix, url);
-                    alreadyprocessed++;
-                    continue;
-                }
-                LOG.debug("{} -> added to buffer : {}", logIdprefix, url);
-            }
-
-            if (lastHit != null) {
-                sortValuesForKey(key, lastHit.getSortValues());
-            }
-
-            if (hitsForThisBucket > 0) numBuckets++;
-
-            numhits += hitsForThisBucket;
-
-            LOG.debug(
-                    "{} key [{}], hits[{}], doc_count [{}]",
-                    logIdprefix,
-                    key,
-                    hitsForThisBucket,
-                    docCount,
-                    alreadyprocessed);
-        }
-
-        LOG.info(
-                "{} ES query returned {} hits from {} buckets in {} msec with {} already being processed. Took {} msec per doc on average.",
-                logIdprefix,
-                numhits,
-                numBuckets,
-                timeTaken,
-                alreadyprocessed,
-                ((float) timeTaken / numhits));
-
-        queryTimes.addMeasurement(timeTaken);
-        eventCounter.scope("already_being_processed").incrBy(alreadyprocessed);
-        eventCounter.scope("ES_queries").incrBy(1);
-        eventCounter.scope("ES_docs").incrBy(numhits);
-
-        // optimise the nextFetchDate by getting the most recent value
-        // returned in the query and add to it, unless the previous value is
-        // within n mins in which case we'll keep it
-        if (mostRecentDateFound != null && recentDateIncrease >= 0) {
-            Calendar potentialNewDate = Calendar.getInstance();
-            potentialNewDate.setTimeInMillis(mostRecentDateFound.toEpochMilli());
-            potentialNewDate.add(Calendar.MINUTE, recentDateIncrease);
-            Date oldDate = null;
-            // check boundaries
-            if (this.recentDateMinGap > 0) {
-                Calendar low = Calendar.getInstance();
-                low.setTime(queryDate);
-                low.add(Calendar.MINUTE, -recentDateMinGap);
-                Calendar high = Calendar.getInstance();
-                high.setTime(queryDate);
-                high.add(Calendar.MINUTE, recentDateMinGap);
-                if (high.before(potentialNewDate) || low.after(potentialNewDate)) {
-                    oldDate = queryDate;
-                }
-            } else {
-                oldDate = queryDate;
-            }
-            if (oldDate != null) {
-                queryDate = potentialNewDate.getTime();
-                LOG.info(
-                        "{} queryDate changed from {} to {} based on mostRecentDateFound {}",
-                        logIdprefix,
-                        oldDate,
-                        queryDate,
-                        mostRecentDateFound);
-            } else {
-                LOG.info(
-                        "{} queryDate kept at {} based on mostRecentDateFound {}",
-                        logIdprefix,
-                        queryDate,
-                        mostRecentDateFound);
-            }
-        }
-
-        // reset the value for next fetch date if the previous one is too old
-        if (resetFetchDateAfterNSecs != -1) {
-            Instant changeNeededOn =
-                    Instant.ofEpochMilli(
-                            lastTimeResetToNOW.toEpochMilli() + (resetFetchDateAfterNSecs * 1000));
-            if (Instant.now().isAfter(changeNeededOn)) {
-                LOG.info(
-                        "{} queryDate set to null based on resetFetchDateAfterNSecs {}",
-                        logIdprefix,
-                        resetFetchDateAfterNSecs);
-                queryDate = null;
-            }
-        }
-
-        // change the date if we don't get any results at all
-        if (numBuckets == 0) {
-            queryDate = null;
-        }
-
-        // remove lock
-        markQueryReceivedNow();
-    }
-
-    protected void sortValuesForKey(String key, Object[] sortValues) {}
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/CollapsingSpout.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/CollapsingSpout.java
deleted file mode 100644
index badcf7aa2..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/CollapsingSpout.java
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.persistence;
-
-import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
-
-import java.time.Instant;
-import java.util.Date;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-import org.apache.commons.lang.StringUtils;
-import org.apache.storm.spout.SpoutOutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.InnerHitBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.collapse.CollapseBuilder;
-import org.elasticsearch.search.sort.FieldSortBuilder;
-import org.elasticsearch.search.sort.SortBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
-import org.joda.time.format.ISODateTimeFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Spout which pulls URL from an ES index. Use a single instance unless you use 'es.status.routing'
- * with the StatusUpdaterBolt, in which case you need to have exactly the same number of spout
- * instances as ES shards. Collapses results to implement politeness and ensure a good diversity of
- * sources.
- */
-public class CollapsingSpout extends AbstractSpout implements ActionListener<SearchResponse> {
-
-    private static final Logger LOG = LoggerFactory.getLogger(CollapsingSpout.class);
-
-    /** Used to avoid deep paging * */
-    private static final String ESMaxStartOffsetParamName = "es.status.max.start.offset";
-
-    private int lastStartOffset = 0;
-    private int maxStartOffset = -1;
-
-    @Override
-    public void open(
-            Map<String, Object> stormConf,
-            TopologyContext context,
-            SpoutOutputCollector collector) {
-        maxStartOffset = ConfUtils.getInt(stormConf, ESMaxStartOffsetParamName, -1);
-        super.open(stormConf, context, collector);
-    }
-
-    @Override
-    protected void populateBuffer() {
-        // not used yet or returned empty results
-        if (queryDate == null) {
-            queryDate = new Date();
-            lastTimeResetToNOW = Instant.now();
-            lastStartOffset = 0;
-        }
-        // been running same query for too long and paging deep?
-        else if (maxStartOffset != -1 && lastStartOffset > maxStartOffset) {
-            LOG.info("Reached max start offset {}", lastStartOffset);
-            lastStartOffset = 0;
-        }
-
-        String formattedLastDate = ISODateTimeFormat.dateTimeNoMillis().print(queryDate.getTime());
-
-        LOG.info("{} Populating buffer with nextFetchDate <= {}", logIdprefix, formattedLastDate);
-
-        BoolQueryBuilder queryBuilder =
-                boolQuery()
-                        .filter(QueryBuilders.rangeQuery("nextFetchDate").lte(formattedLastDate));
-
-        if (filterQueries != null) {
-            for (String filterQuery : filterQueries) {
-                queryBuilder.filter(QueryBuilders.queryStringQuery(filterQuery));
-            }
-        }
-
-        SearchRequest request = new SearchRequest(indexName);
-
-        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-        sourceBuilder.query(queryBuilder);
-        sourceBuilder.from(lastStartOffset);
-        sourceBuilder.size(maxBucketNum);
-        sourceBuilder.explain(false);
-        sourceBuilder.trackTotalHits(false);
-
-        // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-preference.html
-        // _shards:2,3
-        // specific shard but ideally a local copy of it
-        if (shardID != -1) {
-            request.preference("_shards:" + shardID + "|_local");
-        }
-
-        if (queryTimeout != -1) {
-            sourceBuilder.timeout(new TimeValue(queryTimeout, TimeUnit.SECONDS));
-        }
-
-        if (StringUtils.isNotBlank(totalSortField)) {
-            sourceBuilder.sort(new FieldSortBuilder(totalSortField).order(SortOrder.ASC));
-        }
-
-        CollapseBuilder collapse = new CollapseBuilder(partitionField);
-
-        // group expansion -> sends sub queries for each bucket
-        if (maxURLsPerBucket > 1) {
-            InnerHitBuilder ihb = new InnerHitBuilder();
-            ihb.setSize(maxURLsPerBucket);
-            ihb.setName("urls_per_bucket");
-            List<SortBuilder<?>> sorts = new LinkedList<>();
-            // sort within a bucket
-            for (String bsf : bucketSortField) {
-                FieldSortBuilder bucketsorter = SortBuilders.fieldSort(bsf).order(SortOrder.ASC);
-                sorts.add(bucketsorter);
-            }
-            if (!sorts.isEmpty()) {
-                ihb.setSorts(sorts);
-            }
-            collapse.setInnerHits(ihb);
-        }
-
-        sourceBuilder.collapse(collapse);
-
-        request.source(sourceBuilder);
-
-        // dump query to log
-        LOG.debug("{} ES query {}", logIdprefix, request.toString());
-
-        isInQuery.set(true);
-        client.searchAsync(request, RequestOptions.DEFAULT, this);
-    }
-
-    @Override
-    public void onFailure(Exception e) {
-        LOG.error("{} Exception with ES query", logIdprefix, e);
-        markQueryReceivedNow();
-    }
-
-    @Override
-    public void onResponse(SearchResponse response) {
-        long timeTaken = System.currentTimeMillis() - getTimeLastQuerySent();
-
-        SearchHit[] hits = response.getHits().getHits();
-        int numBuckets = hits.length;
-
-        int alreadyprocessed = 0;
-        int numDocs = 0;
-
-        for (SearchHit hit : hits) {
-            Map<String, SearchHits> innerHits = hit.getInnerHits();
-            // wanted just one per bucket : no inner hits
-            if (innerHits == null) {
-                numDocs++;
-                if (!addHitToBuffer(hit)) {
-                    alreadyprocessed++;
-                }
-                continue;
-            }
-            // more than one per bucket
-            SearchHits inMyBucket = innerHits.get("urls_per_bucket");
-            for (SearchHit subHit : inMyBucket.getHits()) {
-                numDocs++;
-                if (!addHitToBuffer(subHit)) {
-                    alreadyprocessed++;
-                }
-            }
-        }
-
-        queryTimes.addMeasurement(timeTaken);
-        // could be derived from the count of query times above
-        eventCounter.scope("ES_queries").incrBy(1);
-        eventCounter.scope("ES_docs").incrBy(numDocs);
-        eventCounter.scope("already_being_processed").incrBy(alreadyprocessed);
-
-        LOG.info(
-                "{} ES query returned {} hits from {} buckets in {} msec with {} already being processed.Took {} msec per doc on average.",
-                logIdprefix,
-                numDocs,
-                numBuckets,
-                timeTaken,
-                alreadyprocessed,
-                ((float) timeTaken / numDocs));
-
-        // reset the value for next fetch date if the previous one is too old
-        if (resetFetchDateAfterNSecs != -1) {
-            Instant changeNeededOn =
-                    Instant.ofEpochMilli(
-                            lastTimeResetToNOW.toEpochMilli() + (resetFetchDateAfterNSecs * 1000));
-            if (Instant.now().isAfter(changeNeededOn)) {
-                LOG.info(
-                        "queryDate reset based on resetFetchDateAfterNSecs {}",
-                        resetFetchDateAfterNSecs);
-                queryDate = null;
-                lastStartOffset = 0;
-            }
-        }
-
-        // no more results?
-        if (numBuckets == 0) {
-            queryDate = null;
-            lastStartOffset = 0;
-        }
-        // still got some results but paging won't help
-        else if (numBuckets < maxBucketNum) {
-            lastStartOffset = 0;
-        } else {
-            lastStartOffset += numBuckets;
-        }
-
-        // remove lock
-        markQueryReceivedNow();
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/HybridSpout.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/HybridSpout.java
deleted file mode 100644
index 8e16a5337..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/HybridSpout.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.persistence;
-
-import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
-
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-import java.time.Instant;
-import java.util.Date;
-import java.util.List;
-import java.util.Map;
-import org.apache.storm.spout.SpoutOutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.stormcrawler.persistence.EmptyQueueListener;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.FieldSortBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
-import org.joda.time.format.ISODateTimeFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Uses collapsing spouts to get an initial set of URLs and keys to query for and gets emptyQueue
- * notifications from the URLBuffer to query ES for a specific key.
- *
- * @since 1.15
- */
-public class HybridSpout extends AggregationSpout implements EmptyQueueListener {
-
-    private static final Logger LOG = LoggerFactory.getLogger(HybridSpout.class);
-
-    protected static final String RELOADPARAMNAME = "es.status.max.urls.per.reload";
-
-    private int bufferReloadSize = 10;
-
-    private Cache<String, Object[]> searchAfterCache;
-
-    private HostResultListener hrl;
-
-    @Override
-    public void open(
-            Map<String, Object> stormConf,
-            TopologyContext context,
-            SpoutOutputCollector collector) {
-        super.open(stormConf, context, collector);
-        bufferReloadSize = ConfUtils.getInt(stormConf, RELOADPARAMNAME, maxURLsPerBucket);
-        buffer.setEmptyQueueListener(this);
-        searchAfterCache = Caffeine.newBuilder().build();
-        hrl = new HostResultListener();
-    }
-
-    @Override
-    public void emptyQueue(String queueName) {
-
-        LOG.info("{} Emptied buffer queue for {}", logIdprefix, queueName);
-
-        if (!currentBuckets.contains(queueName)) {
-            // not interested in this one any more
-            return;
-        }
-
-        // reloading the aggregs - searching now
-        // would just overload ES and yield
-        // mainly duplicates
-        if (isInQuery.get()) {
-            LOG.trace("{} isInquery true", logIdprefix, queueName);
-            return;
-        }
-
-        LOG.info("{} Querying for more docs for {}", logIdprefix, queueName);
-
-        if (queryDate == null) {
-            queryDate = new Date();
-            lastTimeResetToNOW = Instant.now();
-        }
-
-        String formattedQueryDate = ISODateTimeFormat.dateTimeNoMillis().print(queryDate.getTime());
-
-        BoolQueryBuilder queryBuilder =
-                boolQuery()
-                        .filter(QueryBuilders.rangeQuery("nextFetchDate").lte(formattedQueryDate));
-
-        queryBuilder.filter(QueryBuilders.termQuery(partitionField, queueName));
-
-        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-        sourceBuilder.query(queryBuilder);
-        sourceBuilder.from(0);
-        sourceBuilder.size(bufferReloadSize);
-        sourceBuilder.explain(false);
-        sourceBuilder.trackTotalHits(false);
-
-        // sort within a bucket
-        for (String bsf : bucketSortField) {
-            FieldSortBuilder sorter = SortBuilders.fieldSort(bsf).order(SortOrder.ASC);
-            sourceBuilder.sort(sorter);
-        }
-
-        // do we have a search after for this one?
-        Object[] searchAfterValues = searchAfterCache.getIfPresent(queueName);
-        if (searchAfterValues != null) {
-            sourceBuilder.searchAfter(searchAfterValues);
-        }
-
-        SearchRequest request = new SearchRequest(indexName);
-
-        request.source(sourceBuilder);
-
-        // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-preference.html
-        // _shards:2,3
-        // specific shard but ideally a local copy of it
-        if (shardID != -1) {
-            request.preference("_shards:" + shardID + "|_local");
-        }
-
-        // dump query to log
-        LOG.debug("{} ES query {} - {}", logIdprefix, queueName, request.toString());
-
-        client.searchAsync(request, RequestOptions.DEFAULT, hrl);
-    }
-
-    @Override
-    /** Overrides the handling of responses for aggregations */
-    public void onResponse(SearchResponse response) {
-        // delete all entries from the searchAfterCache when
-        // we get the results from the aggregation spouts
-        searchAfterCache.invalidateAll();
-        super.onResponse(response);
-    }
-
-    @Override
-    /** The aggregation kindly told us where to start from * */
-    protected void sortValuesForKey(String key, Object[] sortValues) {
-        if (sortValues != null && sortValues.length > 0) this.searchAfterCache.put(key, sortValues);
-    }
-
-    /** Handling of results for a specific queue * */
-    class HostResultListener implements ActionListener<SearchResponse> {
-
-        @Override
-        public void onResponse(SearchResponse response) {
-
-            int alreadyprocessed = 0;
-            int numDocs = 0;
-
-            SearchHit[] hits = response.getHits().getHits();
-
-            Object[] sortValues = null;
-
-            // retrieve the key for these results
-            String key = null;
-
-            for (SearchHit hit : hits) {
-                numDocs++;
-                String pfield = partitionField;
-                Map<String, Object> sourceAsMap = hit.getSourceAsMap();
-                if (pfield.startsWith("metadata.")) {
-                    sourceAsMap = (Map<String, Object>) sourceAsMap.get("metadata");
-                    pfield = pfield.substring(9);
-                }
-                Object key_as_object = sourceAsMap.get(pfield);
-                if (key_as_object instanceof List) {
-                    if (((List) (key_as_object)).size() == 1)
-                        key = (String) ((List) key_as_object).get(0);
-                } else {
-                    key = key_as_object.toString();
-                }
-
-                sortValues = hit.getSortValues();
-                if (!addHitToBuffer(hit)) {
-                    alreadyprocessed++;
-                }
-            }
-
-            // no key if no results have been found
-            if (key != null) {
-                searchAfterCache.put(key, sortValues);
-            }
-
-            eventCounter.scope("ES_queries_host").incrBy(1);
-            eventCounter.scope("ES_docs_host").incrBy(numDocs);
-            eventCounter.scope("already_being_processed_host").incrBy(alreadyprocessed);
-
-            LOG.info(
-                    "{} ES term query returned {} hits  in {} msec with {} already being processed for {}",
-                    logIdprefix,
-                    numDocs,
-                    response.getTook().getMillis(),
-                    alreadyprocessed,
-                    key);
-        }
-
-        @Override
-        public void onFailure(Exception e) {
-            LOG.error("Exception with ES query", e);
-        }
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/ScrollSpout.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/ScrollSpout.java
deleted file mode 100644
index 1f44a01f7..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/ScrollSpout.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.persistence;
-
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import org.apache.storm.topology.OutputFieldsDeclarer;
-import org.apache.storm.tuple.Fields;
-import org.apache.storm.tuple.Values;
-import org.apache.storm.utils.Utils;
-import org.apache.stormcrawler.Constants;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.persistence.AbstractStatusUpdaterBolt;
-import org.apache.stormcrawler.persistence.Status;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.search.SearchScrollRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Reads all the documents from a shard and emits them on the status stream. Used for copying an
- * index.
- */
-public class ScrollSpout extends AbstractSpout implements ActionListener<SearchResponse> {
-
-    private String scrollId = null;
-    private boolean hasFinished = false;
-
-    private Queue<Values> queue = new LinkedList<>();
-
-    private static final Logger LOG = LoggerFactory.getLogger(ScrollSpout.class);
-
-    @Override
-    // simplified version of the super method so that we can store the fields in
-    // the
-    // map of things being processed
-    public void nextTuple() {
-        synchronized (queue) {
-            if (!queue.isEmpty()) {
-                List<Object> fields = queue.remove();
-                String url = fields.get(0).toString();
-                _collector.emit(Constants.StatusStreamName, fields, url);
-                beingProcessed.put(url, fields);
-                eventCounter.scope("emitted").incrBy(1);
-                LOG.debug("{} emitted {}", logIdprefix, url);
-                return;
-            }
-        }
-
-        if (isInQuery.get()) {
-            LOG.trace("{} isInquery true", logIdprefix);
-            // sleep for a bit but not too much in order to give ack/fail a
-            // chance
-            Utils.sleep(10);
-            return;
-        }
-
-        // re-populate the buffer
-        populateBuffer();
-    }
-
-    @Override
-    protected void populateBuffer() {
-        if (hasFinished) {
-            Utils.sleep(10);
-            return;
-        }
-
-        // initial request
-        if (scrollId == null) {
-            SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
-            searchSourceBuilder.query(QueryBuilders.matchAllQuery());
-            searchSourceBuilder.size(maxURLsPerBucket * maxBucketNum);
-            SearchRequest searchRequest = new SearchRequest(indexName);
-            searchRequest.source(searchSourceBuilder);
-            searchRequest.scroll(TimeValue.timeValueMinutes(5L));
-
-            // specific shard but ideally a local copy of it
-            if (shardID != -1) {
-                searchRequest.preference("_shards:" + shardID + "|_local");
-            }
-
-            isInQuery.set(true);
-            LOG.trace("{} isInquery set to true", logIdprefix);
-
-            client.searchAsync(searchRequest, RequestOptions.DEFAULT, this);
-
-            // dump query to log
-            LOG.debug("{} ES query {}", logIdprefix, searchRequest.toString());
-            return;
-        }
-
-        SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
-        scrollRequest.scroll(TimeValue.timeValueMinutes(5L));
-
-        isInQuery.set(true);
-        client.scrollAsync(scrollRequest, RequestOptions.DEFAULT, this);
-        // dump query to log
-        LOG.debug("{} ES query {}", logIdprefix, scrollRequest.toString());
-    }
-
-    @Override
-    public void onResponse(SearchResponse response) {
-        SearchHits hits = response.getHits();
-        LOG.info(
-                "{} ES query returned {} hits in {} msec",
-                logIdprefix,
-                hits.getHits().length,
-                response.getTook().getMillis());
-        hasFinished = hits.getHits().length == 0;
-        synchronized (this.queue) {
-            // Unlike standard spouts, the scroll queries should never return
-            // the same
-            // document twice -> no need to look in the buffer or cache
-            for (SearchHit hit : hits) {
-                Map<String, Object> keyValues = hit.getSourceAsMap();
-                String url = (String) keyValues.get("url");
-                String status = (String) keyValues.get("status");
-                String nextFetchDate = (String) keyValues.get("nextFetchDate");
-                Metadata metadata = fromKeyValues(keyValues);
-                metadata.setValue(
-                        AbstractStatusUpdaterBolt.AS_IS_NEXTFETCHDATE_METADATA, nextFetchDate);
-                this.queue.add(new Values(url, metadata, Status.valueOf(status)));
-            }
-        }
-        scrollId = response.getScrollId();
-        // remove lock
-        markQueryReceivedNow();
-    }
-
-    @Override
-    public void onFailure(Exception e) {
-        LOG.error("{} Exception with ES query", logIdprefix, e);
-        markQueryReceivedNow();
-    }
-
-    @Override
-    public void fail(Object msgId) {
-        LOG.info("{}  Fail for {}", logIdprefix, msgId);
-        eventCounter.scope("failed").incrBy(1);
-        // retrieve the values from being processed and send them back to the
-        // queue
-        Values v = (Values) beingProcessed.remove(msgId);
-        queue.add(v);
-    }
-
-    @Override
-    public void declareOutputFields(OutputFieldsDeclarer declarer) {
-        declarer.declareStream(Constants.StatusStreamName, new Fields("url", "metadata", "status"));
-    }
-}
diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/StatusUpdaterBolt.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/StatusUpdaterBolt.java
deleted file mode 100644
index 3e10348c5..000000000
--- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/persistence/StatusUpdaterBolt.java
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.persistence;
-
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-import com.github.benmanes.caffeine.cache.RemovalCause;
-import com.github.benmanes.caffeine.cache.RemovalListener;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.stream.Collectors;
-import org.apache.commons.lang.StringUtils;
-import org.apache.storm.metric.api.MultiCountMetric;
-import org.apache.storm.metric.api.MultiReducedMetric;
-import org.apache.storm.task.OutputCollector;
-import org.apache.storm.task.TopologyContext;
-import org.apache.storm.tuple.Tuple;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.elasticsearch.BulkItemResponseToFailedFlag;
-import org.apache.stormcrawler.elasticsearch.ElasticSearchConnection;
-import org.apache.stormcrawler.persistence.AbstractStatusUpdaterBolt;
-import org.apache.stormcrawler.persistence.Status;
-import org.apache.stormcrawler.util.ConfUtils;
-import org.apache.stormcrawler.util.PerSecondReducer;
-import org.apache.stormcrawler.util.URLPartitioner;
-import org.elasticsearch.action.DocWriteRequest;
-import org.elasticsearch.action.bulk.BulkItemResponse;
-import org.elasticsearch.action.bulk.BulkProcessor;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.rest.RestStatus;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentFactory;
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Simple bolt which stores the status of URLs into ElasticSearch. Takes the tuples coming from the
- * 'status' stream. To be used in combination with a Spout to read from the index.
- */
-public class StatusUpdaterBolt extends AbstractStatusUpdaterBolt
-        implements RemovalListener<String, List<Tuple>>, BulkProcessor.Listener {
-
-    private static final Logger LOG = LoggerFactory.getLogger(StatusUpdaterBolt.class);
-
-    private String ESBoltType = "status";
-
-    private static final String ESStatusIndexNameParamName = "es.%s.index.name";
-    private static final String ESStatusRoutingParamName = "es.%s.routing";
-    private static final String ESStatusRoutingFieldParamName = "es.%s.routing.fieldname";
-
-    private boolean routingFieldNameInMetadata = false;
-
-    private String indexName;
-
-    private URLPartitioner partitioner;
-
-    /** whether to apply the same partitioning logic used for politeness for routing, e.g byHost */
-    private boolean doRouting;
-
-    /** Store the key used for routing explicitly as a field in metadata * */
-    private String fieldNameForRoutingKey = null;
-
-    private ElasticSearchConnection connection;
-
-    private Cache<String, List<Tuple>> waitAck;
-
-    // Be fair due to cache timeout
-    private final ReentrantLock waitAckLock = new ReentrantLock(true);
-
-    private MultiCountMetric eventCounter;
-
-    private MultiReducedMetric receivedPerSecMetrics;
-
-    public StatusUpdaterBolt() {
-        super();
-    }
-
-    /**
-     * Loads the configuration using a substring different from the default value 'status' in order
-     * to distinguish it from the spout configurations
-     */
-    public StatusUpdaterBolt(String boltType) {
-        super();
-        ESBoltType = boltType;
-    }
-
-    @Override
-    public void prepare(
-            Map<String, Object> stormConf, TopologyContext context, OutputCollector collector) {
-
-        super.prepare(stormConf, context, collector);
-
-        indexName =
-                ConfUtils.getString(
-                        stormConf,
-                        String.format(StatusUpdaterBolt.ESStatusIndexNameParamName, ESBoltType),
-                        "status");
-
-        doRouting =
-                ConfUtils.getBoolean(
-                        stormConf,
-                        String.format(StatusUpdaterBolt.ESStatusRoutingParamName, ESBoltType),
-                        false);
-
-        partitioner = new URLPartitioner();
-        partitioner.configure(stormConf);
-
-        fieldNameForRoutingKey =
-                ConfUtils.getString(
-                        stormConf,
-                        String.format(StatusUpdaterBolt.ESStatusRoutingFieldParamName, ESBoltType));
-        if (StringUtils.isNotBlank(fieldNameForRoutingKey)) {
-            if (fieldNameForRoutingKey.startsWith("metadata.")) {
-                routingFieldNameInMetadata = true;
-                fieldNameForRoutingKey = fieldNameForRoutingKey.substring("metadata.".length());
-            }
-            // periods are not allowed in ES2 - replace with %2E
-            fieldNameForRoutingKey = fieldNameForRoutingKey.replaceAll("\\.", "%2E");
-        }
-
-        waitAck =
-                Caffeine.newBuilder()
-                        .expireAfterWrite(60, TimeUnit.SECONDS)
-                        .removalListener(this)
-                        .build();
-
-        int metrics_time_bucket_secs = 30;
-
-        // create gauge for waitAck
-        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), metrics_time_bucket_secs);
-
-        // benchmarking - average number of items received back by Elastic per second
-        this.receivedPerSecMetrics =
-                context.registerMetric(
-                        "average_persec",
-                        new MultiReducedMetric(new PerSecondReducer()),
-                        metrics_time_bucket_secs);
-
-        this.eventCounter =
-                context.registerMetric(
-                        "counters", new MultiCountMetric(), metrics_time_bucket_secs);
-
-        try {
-            connection = ElasticSearchConnection.getConnection(stormConf, ESBoltType, this);
-        } catch (Exception e1) {
-            LOG.error("Can't connect to ElasticSearch", e1);
-            throw new RuntimeException(e1);
-        }
-    }
-
-    @Override
-    public void cleanup() {
-        if (connection == null) {
-            return;
-        }
-        connection.close();
-        connection = null;
-    }
-
-    @Override
-    public void store(
-            String url, Status status, Metadata metadata, Optional<Date> nextFetch, Tuple tuple)
-            throws Exception {
-
-        String documentID = getDocumentID(metadata, url);
-
-        boolean isAlreadySentAndDiscovered;
-        // need to synchronize: otherwise it might get added to the cache
-        // without having been sent to ES
-        waitAckLock.lock();
-        try {
-            // check that the same URL is not being sent to ES
-            final var alreadySent = waitAck.getIfPresent(documentID);
-            isAlreadySentAndDiscovered = status.equals(Status.DISCOVERED) && alreadySent != null;
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        if (isAlreadySentAndDiscovered) {
-            // if this object is discovered - adding another version of it
-            // won't make any difference
-            LOG.debug(
-                    "Already being sent to ES {} with status {} and ID {}",
-                    url,
-                    status,
-                    documentID);
-            // ack straight away!
-            eventCounter.scope("skipped").incrBy(1);
-            super.ack(tuple, url);
-            return;
-        }
-
-        XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
-        builder.field("url", url);
-        builder.field("status", status);
-
-        builder.startObject("metadata");
-        for (String mdKey : metadata.keySet()) {
-            String[] values = metadata.getValues(mdKey);
-            // periods are not allowed in ES2 - replace with %2E
-            mdKey = mdKey.replaceAll("\\.", "%2E");
-            builder.array(mdKey, values);
-        }
-
-        String partitionKey = partitioner.getPartition(url, metadata);
-        if (partitionKey == null) {
-            partitionKey = "_DEFAULT_";
-        }
-
-        // store routing key in metadata?
-        if (StringUtils.isNotBlank(fieldNameForRoutingKey) && routingFieldNameInMetadata) {
-            builder.field(fieldNameForRoutingKey, partitionKey);
-        }
-
-        builder.endObject();
-
-        // store routing key outside metadata?
-        if (StringUtils.isNotBlank(fieldNameForRoutingKey) && !routingFieldNameInMetadata) {
-            builder.field(fieldNameForRoutingKey, partitionKey);
-        }
-
-        if (nextFetch.isPresent()) {
-            builder.timeField("nextFetchDate", nextFetch.get());
-        }
-
-        builder.endObject();
-
-        IndexRequest request = new IndexRequest(getIndexName(metadata));
-
-        // check that we don't overwrite an existing entry
-        // When create is used, the index operation will fail if a document
-        // by that id already exists in the index.
-        final boolean create = status.equals(Status.DISCOVERED);
-        request.source(builder).id(documentID).create(create);
-
-        if (doRouting) {
-            request.routing(partitionKey);
-        }
-
-        waitAckLock.lock();
-        try {
-            final List<Tuple> tt = waitAck.get(documentID, k -> new LinkedList<>());
-            tt.add(tuple);
-            LOG.debug("Added to waitAck {} with ID {} total {}", url, documentID, tt.size());
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        LOG.debug("Sending to ES buffer {} with ID {}", url, documentID);
-
-        connection.addToProcessor(request);
-    }
-
-    @Override
-    public void onRemoval(
-            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
-        if (!cause.wasEvicted()) return;
-        LOG.error("Purged from waitAck {} with {} values", key, value.size());
-        for (Tuple t : value) {
-            eventCounter.scope("purged").incrBy(1);
-            _collector.fail(t);
-        }
-    }
-
-    @Override
-    public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
-        LOG.debug("afterBulk [{}] with {} responses", executionId, request.numberOfActions());
-        eventCounter.scope("bulks_received").incrBy(1);
-        eventCounter.scope("bulk_msec").incrBy(response.getTook().getMillis());
-        eventCounter.scope("received").incrBy(request.numberOfActions());
-        receivedPerSecMetrics.scope("received").update(request.numberOfActions());
-
-        var idsToBulkItemsWithFailedFlag =
-                Arrays.stream(response.getItems())
-                        .map(
-                                bir -> {
-                                    String id = bir.getId();
-                                    BulkItemResponse.Failure f = bir.getFailure();
-                                    boolean failed = false;
-                                    if (f != null) {
-                                        // already discovered
-                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
-                                            eventCounter.scope("doc_conflicts").incrBy(1);
-                                            LOG.debug("Doc conflict ID {}", id);
-                                        } else {
-                                            LOG.error("Update ID {}, failure: {}", id, f);
-                                            failed = true;
-                                        }
-                                    }
-                                    return new BulkItemResponseToFailedFlag(bir, failed);
-                                })
-                        .collect(
-                                // https://github.com/DigitalPebble/storm-crawler/issues/832
-                                Collectors.groupingBy(
-                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
-                                        Collectors.toUnmodifiableList()));
-
-        Map<String, List<Tuple>> presentTuples;
-        long estimatedSize;
-        Set<String> debugInfo = null;
-        waitAckLock.lock();
-        try {
-            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
-            if (!presentTuples.isEmpty()) {
-                waitAck.invalidateAll(presentTuples.keySet());
-            }
-            estimatedSize = waitAck.estimatedSize();
-            // Only if we have to.
-            if (LOG.isDebugEnabled() && estimatedSize > 0L) {
-                debugInfo = new HashSet<>(waitAck.asMap().keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        int ackCount = 0;
-        int failureCount = 0;
-
-        for (var entry : presentTuples.entrySet()) {
-            final var id = entry.getKey();
-            final var associatedTuple = entry.getValue();
-            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
-
-            BulkItemResponseToFailedFlag selected;
-            if (bulkItemsWithFailedFlag.size() == 1) {
-                selected = bulkItemsWithFailedFlag.get(0);
-            } else {
-                // Fallback if there are multiple responses for the same id
-                BulkItemResponseToFailedFlag tmp = null;
-                var ctFailed = 0;
-                for (var buwff : bulkItemsWithFailedFlag) {
-                    if (tmp == null) {
-                        tmp = buwff;
-                    }
-                    if (buwff.failed) ctFailed++;
-                    else tmp = buwff;
-                }
-                if (ctFailed != bulkItemsWithFailedFlag.size()) {
-                    LOG.warn(
-                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
-                            id);
-                }
-                selected = Objects.requireNonNull(tmp);
-            }
-
-            if (associatedTuple != null) {
-                LOG.debug("Acked {} tuple(s) for ID {}", associatedTuple.size(), id);
-                for (Tuple tuple : associatedTuple) {
-                    if (!selected.failed) {
-                        String url = tuple.getStringByField("url");
-                        ackCount++;
-                        // ack and put in cache
-                        LOG.debug("Acked {} with ID {}", url, id);
-                        eventCounter.scope("acked").incrBy(1);
-                        super.ack(tuple, url);
-                    } else {
-                        failureCount++;
-                        eventCounter.scope("failed").incrBy(1);
-                        _collector.fail(tuple);
-                    }
-                }
-            } else {
-                LOG.warn("Could not find unacked tuple for {}", id);
-            }
-        }
-
-        LOG.info(
-                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
-                executionId,
-                idsToBulkItemsWithFailedFlag.size(),
-                estimatedSize,
-                ackCount,
-                failureCount);
-        if (debugInfo != null) {
-            for (String kinaw : debugInfo) {
-                LOG.debug("Still in wait ack after bulk response [{}] => {}", executionId, kinaw);
-            }
-        }
-    }
-
-    @Override
-    public void afterBulk(long executionId, BulkRequest request, Throwable throwable) {
-        eventCounter.scope("bulks_received").incrBy(1);
-        eventCounter.scope("received").incrBy(request.numberOfActions());
-        receivedPerSecMetrics.scope("received").update(request.numberOfActions());
-
-        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, throwable);
-
-        final var failedIds =
-                request.requests().stream()
-                        .map(DocWriteRequest::id)
-                        .collect(Collectors.toUnmodifiableSet());
-        waitAckLock.lock();
-        Map<String, List<Tuple>> failedTupleLists;
-        try {
-            failedTupleLists = waitAck.getAllPresent(failedIds);
-            if (!failedTupleLists.isEmpty()) {
-                waitAck.invalidateAll(failedTupleLists.keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        for (var id : failedIds) {
-            var failedTuples = failedTupleLists.get(id);
-            if (failedTuples != null) {
-                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
-                for (Tuple x : failedTuples) {
-                    // fail it
-                    eventCounter.scope("failed").incrBy(1);
-                    _collector.fail(x);
-                }
-            } else {
-                LOG.warn("Could not find unacked tuple for {}", id);
-            }
-        }
-    }
-
-    @Override
-    public void beforeBulk(long executionId, BulkRequest request) {
-        LOG.debug("beforeBulk {} with {} actions", executionId, request.numberOfActions());
-        eventCounter.scope("bulks_sent").incrBy(1);
-    }
-
-    /**
-     * Must be overridden for implementing custom index names based on some metadata information By
-     * Default, indexName coming from config is used
-     */
-    protected String getIndexName(Metadata m) {
-        return indexName;
-    }
-}
diff --git a/external/elasticsearch/src/test/java/org/apache/stormcrawler/elasticsearch/bolt/IndexerBoltTest.java b/external/elasticsearch/src/test/java/org/apache/stormcrawler/elasticsearch/bolt/IndexerBoltTest.java
deleted file mode 100644
index fb6278541..000000000
--- a/external/elasticsearch/src/test/java/org/apache/stormcrawler/elasticsearch/bolt/IndexerBoltTest.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.bolt;
-
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.*;
-import org.apache.storm.task.OutputCollector;
-import org.apache.storm.tuple.Tuple;
-import org.apache.stormcrawler.Constants;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.TestOutputCollector;
-import org.apache.stormcrawler.TestUtil;
-import org.apache.stormcrawler.indexing.AbstractIndexerBolt;
-import org.junit.*;
-import org.junit.rules.Timeout;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.testcontainers.elasticsearch.ElasticsearchContainer;
-
-public class IndexerBoltTest {
-
-    @Rule public Timeout globalTimeout = Timeout.seconds(120);
-
-    private ElasticsearchContainer container;
-    private IndexerBolt bolt;
-    protected TestOutputCollector output;
-
-    private static final Logger LOG = LoggerFactory.getLogger(IndexerBoltTest.class);
-    private static ExecutorService executorService;
-
-    @BeforeClass
-    public static void beforeClass() {
-        executorService = Executors.newFixedThreadPool(2);
-    }
-
-    @AfterClass
-    public static void afterClass() {
-        executorService.shutdown();
-        executorService = null;
-    }
-
-    @Before
-    public void setupIndexerBolt() {
-
-        String version = System.getProperty("elasticsearch-version");
-        if (version == null) version = "7.17.7";
-        LOG.info("Starting docker instance of Elasticsearch {}...", version);
-
-        container =
-                new ElasticsearchContainer(
-                        "docker.elastic.co/elasticsearch/elasticsearch:" + version);
-        container.withPassword("s3cret");
-        container.start();
-
-        bolt = new IndexerBolt("content");
-
-        // give the indexer the port for connecting to ES
-
-        Map<String, Object> conf = new HashMap<>();
-        conf.put(AbstractIndexerBolt.urlFieldParamName, "url");
-        conf.put(AbstractIndexerBolt.canonicalMetadataParamName, "canonical");
-        conf.put("es.indexer.addresses", container.getHttpHostAddress());
-        conf.put("es.indexer.compatibility.mode", false);
-        conf.put("es.indexer.user", "elastic");
-        conf.put("es.indexer.password", "s3cret");
-
-        output = new TestOutputCollector();
-
-        bolt.prepare(conf, TestUtil.getMockedTopologyContext(), new OutputCollector(output));
-    }
-
-    @After
-    public void close() {
-        LOG.info("Closing indexer bolt and ES container");
-        bolt.cleanup();
-        container.close();
-        output = null;
-    }
-
-    private void index(String url, String text, Metadata metadata) {
-        Tuple tuple = mock(Tuple.class);
-        when(tuple.getStringByField("text")).thenReturn(text);
-        when(tuple.getStringByField("url")).thenReturn(url);
-        when(tuple.getValueByField("metadata")).thenReturn(metadata);
-        bolt.execute(tuple);
-    }
-
-    private int lastIndex(String url, String text, Metadata metadata, long timeoutInMs)
-            throws ExecutionException, InterruptedException, TimeoutException {
-        var oldSize = output.getEmitted(Constants.StatusStreamName).size();
-        index(url, text, metadata);
-        return executorService
-                .submit(
-                        () -> {
-                            // check that something has been emitted out
-                            var outputSize = output.getEmitted(Constants.StatusStreamName).size();
-                            while (outputSize == oldSize) {
-                                Thread.sleep(100);
-                                outputSize = output.getEmitted(Constants.StatusStreamName).size();
-                            }
-                            return outputSize;
-                        })
-                .get(timeoutInMs, TimeUnit.MILLISECONDS);
-    }
-
-    @Test
-    // https://github.com/DigitalPebble/storm-crawler/issues/832
-    public void simultaneousCanonicals()
-            throws ExecutionException, InterruptedException, TimeoutException {
-        Metadata m1 = new Metadata();
-        String url =
-                "https://www.obozrevatel.com/ukr/dnipro/city/u-dnipri-ta-oblasti-ogolosili-shtormove-poperedzhennya.htm";
-        m1.addValue("canonical", url);
-
-        Metadata m2 = new Metadata();
-        String url2 =
-                "https://www.obozrevatel.com/ukr/dnipro/city/u-dnipri-ta-oblasti-ogolosili-shtormove-poperedzhennya/amp.htm";
-        m2.addValue("canonical", url);
-
-        index(url, "", m1);
-        lastIndex(url2, "", m2, 10_000);
-
-        // should be two in status output
-        assertEquals(2, output.getEmitted(Constants.StatusStreamName).size());
-
-        // and 2 acked
-        assertEquals(2, output.getAckedTuples().size());
-
-        // TODO check output in ES?
-
-    }
-}
diff --git a/external/elasticsearch/src/test/java/org/apache/stormcrawler/elasticsearch/bolt/StatusBoltTest.java b/external/elasticsearch/src/test/java/org/apache/stormcrawler/elasticsearch/bolt/StatusBoltTest.java
deleted file mode 100644
index a89e79710..000000000
--- a/external/elasticsearch/src/test/java/org/apache/stormcrawler/elasticsearch/bolt/StatusBoltTest.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.stormcrawler.elasticsearch.bolt;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.charset.Charset;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Objects;
-import java.util.concurrent.*;
-import org.apache.http.HttpHost;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-import org.apache.storm.task.OutputCollector;
-import org.apache.storm.tuple.Tuple;
-import org.apache.stormcrawler.Metadata;
-import org.apache.stormcrawler.TestOutputCollector;
-import org.apache.stormcrawler.TestUtil;
-import org.apache.stormcrawler.elasticsearch.persistence.StatusUpdaterBolt;
-import org.apache.stormcrawler.persistence.Status;
-import org.elasticsearch.action.get.GetRequest;
-import org.elasticsearch.action.get.GetResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.CreateIndexRequest;
-import org.elasticsearch.xcontent.XContentType;
-import org.junit.*;
-import org.junit.rules.Timeout;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.testcontainers.elasticsearch.ElasticsearchContainer;
-
-public class StatusBoltTest {
-
-    private ElasticsearchContainer container;
-    private StatusUpdaterBolt bolt;
-    protected TestOutputCollector output;
-
-    protected RestHighLevelClient client;
-
-    private static final Logger LOG = LoggerFactory.getLogger(StatusBoltTest.class);
-    private static ExecutorService executorService;
-
-    @Rule public Timeout globalTimeout = Timeout.seconds(120);
-
-    @BeforeClass
-    public static void beforeClass() {
-        executorService = Executors.newFixedThreadPool(2);
-    }
-
-    @AfterClass
-    public static void afterClass() {
-        executorService.shutdown();
-        executorService = null;
-    }
-
-    @Before
-    public void setupStatusBolt() throws IOException {
-
-        String version = System.getProperty("elasticsearch-version");
-        if (version == null) version = "7.17.7";
-        LOG.info("Starting docker instance of Elasticsearch {}...", version);
-
-        container =
-                new ElasticsearchContainer(
-                                "docker.elastic.co/elasticsearch/elasticsearch:" + version)
-                        .withPassword("s3cret");
-
-        container.start();
-
-        bolt = new StatusUpdaterBolt();
-
-        // configure the status index
-
-        RestClientBuilder builder =
-                RestClient.builder(
-                        new HttpHost(container.getHost(), container.getMappedPort(9200)));
-
-        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
-        credentialsProvider.setCredentials(
-                AuthScope.ANY, new UsernamePasswordCredentials("elastic", "s3cret"));
-
-        builder.setHttpClientConfigCallback(
-                clientBuilder -> {
-                    clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
-                    return clientBuilder;
-                });
-
-        client = new RestHighLevelClient(builder);
-
-        // TODO
-        // https://www.elastic.co/guide/en/elasticsearch/client/java-rest/7.13/java-rest-high-put-mapping.html
-
-        CreateIndexRequest request = new CreateIndexRequest("status");
-
-        URI uriToFile;
-        try {
-            uriToFile =
-                    Objects.requireNonNull(
-                                    getClass().getClassLoader().getResource("status.mapping"))
-                            .toURI();
-        } catch (URISyntaxException e) {
-            throw new RuntimeException(e);
-        }
-
-        String mappingSource = Files.readString(Path.of(uriToFile), Charset.defaultCharset());
-
-        request.source(mappingSource, XContentType.JSON);
-
-        client.indices().create(request, RequestOptions.DEFAULT);
-
-        // configure the status updater bolt
-
-        Map<String, Object> conf = new HashMap<>();
-        conf.put("es.status.routing.fieldname", "metadata.key");
-
-        conf.put("es.status.addresses", container.getHttpHostAddress());
-
-        conf.put("scheduler.class", "org.apache.stormcrawler.persistence.DefaultScheduler");
-
-        conf.put("status.updater.cache.spec", "maximumSize=10000,expireAfterAccess=1h");
-
-        conf.put("metadata.persist", "someKey");
-
-        conf.put("es.status.compatibility.mode", false);
-
-        conf.put("es.status.user", "elastic");
-        conf.put("es.status.password", "s3cret");
-
-        output = new TestOutputCollector();
-
-        bolt.prepare(conf, TestUtil.getMockedTopologyContext(), new OutputCollector(output));
-    }
-
-    @After
-    public void close() {
-        LOG.info("Closing updater bolt and ES container");
-        bolt.cleanup();
-        container.close();
-        output = null;
-        try {
-            client.close();
-        } catch (IOException e) {
-        }
-    }
-
-    private Future<Integer> store(String url, Status status, Metadata metadata) {
-        Tuple tuple = mock(Tuple.class);
-        when(tuple.getValueByField("status")).thenReturn(status);
-        when(tuple.getStringByField("url")).thenReturn(url);
-        when(tuple.getValueByField("metadata")).thenReturn(metadata);
-        bolt.execute(tuple);
-
-        return executorService.submit(
-                () -> {
-                    var outputSize = output.getAckedTuples().size();
-                    while (outputSize == 0) {
-                        Thread.sleep(100);
-                        outputSize = output.getAckedTuples().size();
-                    }
-                    return outputSize;
-                });
-    }
-
-    @Test
-    // see https://github.com/DigitalPebble/storm-crawler/issues/885
-    public void checkListKeyFromES()
-            throws IOException, ExecutionException, InterruptedException, TimeoutException {
-
-        String url = "https://www.url.net/something";
-
-        Metadata md = new Metadata();
-
-        md.addValue("someKey", "someValue");
-
-        store(url, Status.DISCOVERED, md).get(10, TimeUnit.SECONDS);
-
-        assertEquals(1, output.getAckedTuples().size());
-
-        // check output in ES?
-
-        String id = org.apache.commons.codec.digest.DigestUtils.sha256Hex(url);
-
-        GetResponse result = client.get(new GetRequest("status", id), RequestOptions.DEFAULT);
-
-        Map<String, Object> sourceAsMap = result.getSourceAsMap();
-
-        final String pfield = "metadata.someKey";
-        sourceAsMap = (Map<String, Object>) sourceAsMap.get("metadata");
-
-        final var pfieldNew = pfield.substring(9);
-        Object key = sourceAsMap.get(pfieldNew);
-
-        assertTrue(key instanceof java.util.ArrayList);
-    }
-}
diff --git a/external/elasticsearch/src/test/resources/status.mapping b/external/elasticsearch/src/test/resources/status.mapping
deleted file mode 100644
index e5b14fe97..000000000
--- a/external/elasticsearch/src/test/resources/status.mapping
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-	"settings": {
-		"index": {
-			"number_of_shards": 10,
-			"number_of_replicas": 1,
-			"refresh_interval": "5s"
-		}
-	},
-	"mappings": {
-			"dynamic_templates": [{
-				"metadata": {
-					"path_match": "metadata.*",
-					"match_mapping_type": "string",
-					"mapping": {
-						"type": "keyword"
-					}
-				}
-			}],
-			"_source": {
-				"enabled": true
-			},
-			"properties": {
-				"key": {
-					"type": "keyword",
-					"index": true
-				},
-				"nextFetchDate": {
-					"type": "date",
-					"format": "date_optional_time"
-				},
-				"status": {
-					"type": "keyword"
-				},
-				"url": {
-					"type": "keyword"
-				}
-			}
-	}
-}
diff --git a/pom.xml b/pom.xml
index 28d93538e..7760a709c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -479,7 +479,6 @@ under the License.
 		<module>core</module>
 		<module>external</module>
 		<module>external/aws</module>
-		<module>external/elasticsearch</module>
 		<module>external/langid</module>
 		<module>external/opensearch</module>
 		<module>external/solr</module>
@@ -488,7 +487,6 @@ under the License.
 		<module>external/urlfrontier</module>
 		<module>external/warc</module>
 		<module>archetype</module>
-		<module>external/elasticsearch/archetype</module>
 		<module>external/opensearch/archetype</module>
 	</modules>
 </project>