From aefa36c54bc2b56cd906170cc0d25f8dbf4f6e48 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 14 Sep 2021 17:26:15 +0200 Subject: [PATCH] other task executions go ahead if UnknownHostException happens on a single task --- .../orcid/SparkDownloadOrcidAuthors.java | 18 +++++++++++++++++- .../orcid/SparkDownloadOrcidWorks.java | 16 +++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 2b8e42bf6..8f0b3a094 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -4,8 +4,11 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -18,6 +21,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.util.LongAccumulator; @@ -78,6 +82,7 @@ public class SparkDownloadOrcidAuthors { LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); logger.info("Retrieving data from lamda sequence file"); JavaPairRDD lamdaFileRDD = sc @@ -107,7 +112,17 @@ public class SparkDownloadOrcidAuthors { httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", String.format("Bearer %s", token)); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + CloseableHttpResponse response = null; + try { + response = client.execute(httpGet); + } catch (UnknownHostException u) { + downloaded.setStatusCode(-1); + unknowHostAcc.add(1); + if (client != null) { + client.close(); + } + return downloaded.toTuple2(); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { @@ -171,6 +186,7 @@ public class SparkDownloadOrcidAuthors { logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); + logger.info("unknowHostAcc: {}", unknowHostAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index cab538783..457c79adb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,6 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -96,6 +98,7 @@ public class SparkDownloadOrcidWorks { LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -154,7 +157,17 @@ public class SparkDownloadOrcidWorks { httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", String.format("Bearer %s", token)); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + CloseableHttpResponse response = null; + try { + response = client.execute(httpGet); + } catch (UnknownHostException u) { + downloaded.setStatusCode(-1); + unknowHostAcc.add(1); + if (client != null) { + client.close(); + } + return downloaded.toTuple2(); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { @@ -219,6 +232,7 @@ public class SparkDownloadOrcidWorks { logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); + logger.info("unknowHostAcc: {}", unknowHostAcc.value()); }); }