From 64311b8be468c46ad8d03ec206feea5a082ac00c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Sun, 31 Jul 2022 01:03:29 +0200 Subject: [PATCH] removed unuseful accumulator --- .../orcid/SparkDownloadOrcidWorks.java | 38 +++++++++---------- .../oozie_app/workflow.xml | 4 +- .../src/test/resources/log4j.properties | 1 + 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 72122f8b93..6b9c560055 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -80,17 +80,10 @@ public class SparkDownloadOrcidWorks { LongAccumulator parsedWorksAcc = spark.sparkContext().longAccumulator("parsed_works"); LongAccumulator modifiedWorksAcc = spark.sparkContext().longAccumulator("modified_works"); LongAccumulator errorCodeFoundAcc = spark.sparkContext().longAccumulator("error_code_found"); - LongAccumulator errorLoadingJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_json_found"); - LongAccumulator errorLoadingXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_xml_found"); LongAccumulator errorParsingXMLFoundAcc = spark .sparkContext() .longAccumulator("error_parsing_xml_found"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -107,11 +100,10 @@ public class SparkDownloadOrcidWorks { if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { - errorLoadingJsonFoundAcc.add(1); + } else { String authorSummary = ArgumentApplicationParser.decompressValue(compressedData); if (StringUtils.isEmpty(authorSummary)) { - errorLoadingXMLFoundAcc.add(1); } else { try { workIdLastModifiedDate = XMLRecordParser @@ -184,7 +176,6 @@ public class SparkDownloadOrcidWorks { } else { downloaded.setStatusCode(-4); } - errorsAcc.add(1); } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; @@ -193,7 +184,6 @@ public class SparkDownloadOrcidWorks { } if (downloadCompleted) { downloaded.setStatusCode(200); - downloadedRecordsAcc.add(1); downloaded .setCompressedData( ArgumentApplicationParser @@ -214,9 +204,20 @@ public class SparkDownloadOrcidWorks { String works = ArgumentApplicationParser.decompressValue(compressedData); // split a single xml containing multiple works into multiple xml (a single work for each xml) - List splittedWorks = XMLRecordParser - .splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8)); - + List splittedWorks = null; + try { + splittedWorks = XMLRecordParser + .splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8)); + } catch (Throwable t) { + final DownloadedRecordData errDownloaded = new DownloadedRecordData(); + errDownloaded.setOrcidId(orcidId); + errDownloaded.setLastModifiedDate(lastModifiedDate); + errDownloaded.setStatusCode(-10); + errDownloaded.setErrorMessage(t.getMessage()); + splittedDownloadedWorks.add(errDownloaded.toTuple2()); + errorParsingXMLFoundAcc.add(1); + return splittedDownloadedWorks.iterator(); + } splittedWorks.forEach(w -> { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); @@ -228,10 +229,12 @@ public class SparkDownloadOrcidWorks { .setCompressedData( ArgumentApplicationParser .compressArgument(w)); - } catch (IOException e) { - throw new RuntimeException(e); + } catch (Throwable t) { + downloaded.setStatusCode(-11); + downloaded.setErrorMessage(t.getMessage()); } splittedDownloadedWorks.add(downloaded.toTuple2()); + downloadedRecordsAcc.add(1); }); return splittedDownloadedWorks.iterator(); @@ -250,11 +253,8 @@ public class SparkDownloadOrcidWorks { logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); - logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); - logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); - logger.info("errorsAcc: {}", errorsAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml index 43d7eb1ec8..f1195a16f9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -78,7 +78,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -190,7 +190,7 @@ -odownloads/updated_works -t${token} - + diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties index 20f56e38dd..520bf1a18c 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties +++ b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties @@ -7,5 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.logger.org = ERROR log4j.logger.eu.dnetlib = DEBUG +log4j.logger.eu.dnetlib.doiboost.orcid = INFO log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file