From b6d66653f79e5ecde5c9dd3ed220973723fa3ed4 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Wed, 1 Sep 2021 19:42:32 +0300 Subject: [PATCH] - Integrate the latest changes from the "PublicationsRetriever"-plugin. - Update dependencies. --- build.gradle | 6 +++--- gradle/wrapper/gradle-wrapper.properties | 2 +- installAndRun.sh | 2 +- .../plugins/PublicationsRetrieverPlugin.java | 19 +++++++------------ 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/build.gradle b/build.gradle index 79739fc..fdc4015 100644 --- a/build.gradle +++ b/build.gradle @@ -1,12 +1,12 @@ buildscript { ext { - springBootVersion = "2.5.3" - springSecurityVersion = "5.5.1" + springBootVersion = "2.5.4" + springSecurityVersion = "5.5.2" } } plugins { - id 'org.springframework.boot' version '2.5.3' + id 'org.springframework.boot' version '2.5.4' id 'io.spring.dependency-management' version '1.0.11.RELEASE' id 'java' } diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 05679dc..ffed3a2 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.1.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/installAndRun.sh b/installAndRun.sh index 0e51202..20c138e 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -20,7 +20,7 @@ cd ../ && rm -rf PublicationsRetriever cd ../ echo -e "\nAsking for sudo, in order to verify the installation of 'gradle'..\n" -gradleVersion="7.1.1" +gradleVersion="7.2" wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index 533d02f..f44b6e8 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -115,17 +115,9 @@ public class PublicationsRetrieverPlugin { try { // Check if it's a docUrl, if not, it gets crawled. HttpConnUtils.connectAndCheckMimeType(id, sourceUrl, urlToCheck, urlToCheck, null, true, isPossibleDocOrDatasetUrl); } catch (Exception e) { - String wasUrlValid = "true"; - String couldRetry = "false"; - if ( e instanceof RuntimeException ) { - String message = e.getMessage(); - if ( message != null) { - if ( message.contains("HTTP 404 Client Error") ) - wasUrlValid = "false"; - else if ( message.contains("Server Error") || message.contains("HTTP 408") ) - couldRetry = "true"; // We could retry at a later time, as the HTTP-non-404-errors can be temporal. - } - } + List list = LoaderAndChecker.getWasValidAndCouldRetry(e); + String wasUrlValid = list.get(0); + String couldRetry = list.get(1); UrlUtils.logOutputData(id, urlToCheck, null, "unreachable", "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry); } return true; @@ -219,7 +211,10 @@ public class PublicationsRetrieverPlugin { try { return HttpConnUtils.connectAndCheckMimeType("null", urlToCheck, urlToCheck, urlToCheck, null, true, false); // Sent the < null > in quotes to avoid an NPE in the concurrent data-structures. } catch (Exception e) { - UrlUtils.logOutputData(null, urlToCheck, null, "unreachable", "Discarded at loading time, due to connectivity problems.", null, true, "true", "true", "false", "false", "false"); + List list = LoaderAndChecker.getWasValidAndCouldRetry(e); + String wasUrlValid = list.get(0); + String couldRetry = list.get(1); + UrlUtils.logOutputData(null, urlToCheck, null, "unreachable", "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry); return false; } }