From 7d753cce690823eddf7a37577b364fff78edd91d Mon Sep 17 00:00:00 2001 From: Malik Bouaoud Date: Thu, 18 Mar 2021 21:50:25 +0100 Subject: [PATCH 1/3] re interrupting reinterrupting waitNewUrl and sleep after the interruptException is catched --- webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 886e74a92..9567236b0 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -458,6 +458,8 @@ protected void sleep(int time) { Thread.sleep(time); } catch (InterruptedException e) { logger.error("Thread interrupted when sleep",e); + //restore interrupted thread + Thread.currentThread().interrupt(); } } @@ -564,6 +566,7 @@ private void waitNewUrl() { newUrlCondition.await(emptySleepTime, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { logger.warn("waitNewUrl - interrupted, error {}", e); + Thread.currentThread().interrupt(); } finally { newUrlLock.unlock(); } From 8d3c73ac675d159480cd66799089532c07fb41f0 Mon Sep 17 00:00:00 2001 From: Malik Bouaoud Date: Thu, 18 Mar 2021 22:09:30 +0100 Subject: [PATCH 2/3] adding a nullPointerException that could be thrown by the IPUtils method --- .../src/main/java/us/codecraft/webmagic/utils/IPUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java index 3d416964b..dafb7ce70 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java @@ -12,7 +12,7 @@ */ public abstract class IPUtils { - public static String getFirstNoLoopbackIPAddresses() throws SocketException { + public static String getFirstNoLoopbackIPAddresses() throws SocketException, NullPointerException{ Enumeration networkInterfaces = NetworkInterface.getNetworkInterfaces(); From 13f9840b573b8c7305bf30d8f010fb3bc5817ac4 Mon Sep 17 00:00:00 2001 From: Malik Bouaoud Date: Tue, 23 Mar 2021 11:05:12 +0100 Subject: [PATCH 3/3] refactoring on constructors and setters --- .../us/codecraft/webmagic/utils/FilePersistentBase.java | 8 ++++++++ .../webmagic/pipeline/FilePageModelPipeline.java | 4 ++-- .../webmagic/pipeline/JsonFilePageModelPipeline.java | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/FilePersistentBase.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/FilePersistentBase.java index 79b9efece..721401f8b 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/FilePersistentBase.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/FilePersistentBase.java @@ -11,6 +11,14 @@ public class FilePersistentBase { protected String path; + + public FilePersistentBase() { + setPath("/data/webmagic/"); + } + + public FilePersistentBase(String path) { + setPath(path); + } public static String PATH_SEPERATOR = "/"; diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java index 0db9b819d..22d818217 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java @@ -28,11 +28,11 @@ public class FilePageModelPipeline extends FilePersistentBase implements PageMod * new JsonFilePageModelPipeline with default path "/data/webmagic/" */ public FilePageModelPipeline() { - setPath("/data/webmagic/"); + super(); } public FilePageModelPipeline(String path) { - setPath(path); + super(path); } @Override diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java index 7a7f80a25..7b3b3035c 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java @@ -29,11 +29,11 @@ public class JsonFilePageModelPipeline extends FilePersistentBase implements Pag * new JsonFilePageModelPipeline with default path "/data/webmagic/" */ public JsonFilePageModelPipeline() { - setPath("/data/webmagic/"); + super(); } public JsonFilePageModelPipeline(String path) { - setPath(path); + super(path); } @Override