Skip to content

Commit

Permalink
Selenium test (#1093)
Browse files Browse the repository at this point in the history
* WIP tests for Selenium protocol implementation

Signed-off-by: Julien Nioche <julien@digitalpebble.com>

* revert unrelated change + fix test + removed default config for selenium

Signed-off-by: Julien Nioche <julien@digitalpebble.com>

* use -f for option to test protocols (-c conflicts with Storm's); revert to correct timeoutvalues see #882

Signed-off-by: Julien Nioche <julien@digitalpebble.com>

* uped Selenium version to 4.12.1

Signed-off-by: Julien Nioche <julien@digitalpebble.com>

---------

Signed-off-by: Julien Nioche <julien@digitalpebble.com>
  • Loading branch information
jnioche committed Sep 18, 2023
1 parent f7dfa82 commit 156f817
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 13 deletions.
24 changes: 20 additions & 4 deletions core/pom.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<?xml version="1.0"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
Expand All @@ -26,7 +28,7 @@
<commons.lang.version>2.6</commons.lang.version>
<wiremock.version>2.27.2</wiremock.version>
<rometools.version>2.1.0</rometools.version>
<selenium.version>4.9.1</selenium.version>
<selenium.version>4.12.1</selenium.version>
<cli.version>1.5.0</cli.version>
<okhttp.version>4.11.0</okhttp.version>
<caffeine.version>3.1.6</caffeine.version>
Expand Down Expand Up @@ -125,9 +127,9 @@
</dependency>

<dependency>
<groupId>com.github.tomakehurst</groupId>
<groupId>org.wiremock</groupId>
<artifactId>wiremock</artifactId>
<version>${wiremock.version}</version>
<version>3.0.4</version>
<scope>test</scope>
</dependency>

Expand Down Expand Up @@ -239,6 +241,20 @@
<version>${okhttp.version}</version>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>selenium</artifactId>
<version>1.19.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-chrome-driver</artifactId>
<version>${selenium.version}</version>
<scope>test</scope>
</dependency>

</dependencies>
<dependencyManagement>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,13 @@ protected static void main(AbstractHttpProtocol protocol, String[] args) throws
conf.putAll(ConfUtils.extractConfigElement(defaultSCConfig));

Options options = new Options();
options.addOption("c", true, "configuration file");
options.addOption("f", true, "configuration file");

CommandLineParser parser = new DefaultParser();
CommandLine cmd = parser.parse(options, args);

if (cmd.hasOption("c")) {
String confFile = cmd.getOptionValue("c");
String confFile = cmd.getOptionValue("f");
if (confFile != null) {
ConfUtils.loadConf(confFile, conf);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ public void configure(Config conf) {

// see https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities
DesiredCapabilities capabilities = new DesiredCapabilities();
capabilities.setJavascriptEnabled(true);

String userAgentString = getAgentString(conf);

Expand All @@ -58,6 +57,8 @@ public void configure(Config conf) {
}
}

LOG.info("Configuring Selenium with {}", capabilities);

// load addresses from config
List<String> addresses = ConfUtils.loadListFromConf("selenium.addresses", conf);
if (addresses.size() == 0) {
Expand Down
9 changes: 4 additions & 5 deletions core/src/main/resources/crawler-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,11 @@ config:
# navigationfilters.config.file: "navigationfilters.json"
# selenium.addresses: "http://localhost:9515"
selenium.implicitlyWait: 0
selenium.pageLoadTimeout: -1
selenium.pageLoadTimeout: 0
selenium.scriptTimeout: 0
selenium.capabilities:
takesScreenshot: false
loadImages: false
javascriptEnabled: true
# selenium.capabilities:
# a brower name is required
# browserName:"chrome"
# illustrates the use of the variable for user agent
# phantomjs.page.settings.userAgent: "$userAgent"
# ChromeDriver config
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/**
* Licensed to DigitalPebble Ltd under one or more contributor license agreements. See the NOTICE
* file distributed with this work for additional information regarding copyright ownership.
* DigitalPebble licenses this file to You under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy of the
* License at
*
* <p>http://www.apache.org/licenses/LICENSE-2.0
*
* <p>Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.digitalpebble.stormcrawler.protocol.selenium;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.protocol.Protocol;
import com.digitalpebble.stormcrawler.protocol.ProtocolResponse;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.storm.Config;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.openqa.selenium.chrome.ChromeOptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testcontainers.containers.BrowserWebDriverContainer;
import org.testcontainers.containers.BrowserWebDriverContainer.VncRecordingMode;
import org.testcontainers.utility.DockerImageName;

/**
* Tests the Selenium protocol implementation on a standalone Chrome instance and not through
* Selenium Grid. https://java.testcontainers.org/modules/webdriver_containers/#example
*/
public class ProtocolTest {

@Rule public Timeout globalTimeout = Timeout.seconds(120);

private static final Logger LOG = LoggerFactory.getLogger(ProtocolTest.class);

private Protocol protocol;

private static final DockerImageName IMAGE =
DockerImageName.parse("selenium/standalone-chrome:116.0");

@Rule
public BrowserWebDriverContainer<?> chrome =
new BrowserWebDriverContainer<>(IMAGE)
.withCapabilities(new ChromeOptions())
.withRecordingMode(VncRecordingMode.SKIP, null);

@Before
public void setupProtocol() {

LOG.info(
"Configuring protocol instance to connect to {}",
chrome.getSeleniumAddress().toExternalForm());

List<String> l = new ArrayList<>();
// l.add("--no-sandbox");
// l.add("--disable-dev-shm-usage");
// l.add("--headless");
// l.add("--disable-gpu");
// l.add("--remote-allow-origins=*");
Map<String, Object> m = new HashMap<>();
m.put("args", l);
// m.put("extensions", Collections.EMPTY_LIST);

Map<String, Object> capabilities = new HashMap<>();
capabilities.put("browserName", "chrome");
capabilities.put("goog:chromeOptions", m);

Config conf = new Config();
conf.put("http.agent.name", "this.is.only.a.test");
conf.put("selenium.addresses", chrome.getSeleniumAddress().toExternalForm());
conf.put("selenium.setScriptTimeout", 10000);
conf.put("selenium.pageLoadTimeout", 10000);
conf.put("selenium.implicitlyWait", 10000);

conf.put("selenium.capabilities", capabilities);

protocol = new RemoteDriverProtocol();
protocol.configure(conf);
}

@Test
// not working yet
public void test() {
Metadata m = new Metadata();
boolean noException = true;
try {
// find better examples later
ProtocolResponse response = protocol.getProtocolOutput("https://stormcrawler.net", m);
Assert.assertEquals(307, response.getStatusCode());
} catch (Exception e) {
noException = false;
LOG.info("Exception caught", e);
}
Assert.assertEquals(true, noException);
}

@After
public void close() {
protocol.cleanup();
}
}

0 comments on commit 156f817

Please sign in to comment.