From 657b0208a25f9597d73ee028579ccdac80f01849 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 25 Jul 2022 12:37:39 +0200 Subject: [PATCH 1/4] multiple works download (<=100) for single request --- .../orcid/SparkDownloadOrcidWorks.java | 86 +++++++++++++---- .../doiboost/orcid/xml/XMLRecordParser.java | 92 +++++++++++++++++++ .../oozie_app/workflow.xml | 4 +- .../doiboost/orcid/OrcidClientTest.java | 8 +- .../orcid/xml/XMLRecordParserTest.java | 8 ++ .../0000-0001-7281-6306.compressed.base64 | 1 + .../0000-0003-3028-6161.compressed.base64 | 1 - .../orcid/xml/multiple_downloaded_works.xml | 57 ++++++++++++ 8 files changed, 232 insertions(+), 25 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-7281-6306.compressed.base64 delete mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/multiple_downloaded_works.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 0569bacfd..72122f8b9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,6 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -13,6 +15,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function; @@ -20,6 +23,7 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; import com.google.gson.JsonElement; import com.google.gson.JsonParser; @@ -42,6 +46,7 @@ public class SparkDownloadOrcidWorks { public static final String ORCID_XML_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public static final DateTimeFormatter ORCID_XML_DATETIMEFORMATTER = DateTimeFormatter .ofPattern(ORCID_XML_DATETIME_FORMAT); + public static final String DOWNLOAD_WORKS_REQUEST_SEPARATOR = ","; public static void main(String[] args) throws Exception { @@ -56,7 +61,6 @@ public class SparkDownloadOrcidWorks { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); logger.info("workingPath: {}", workingPath); final String outputPath = parser.get("outputPath"); @@ -69,16 +73,12 @@ public class SparkDownloadOrcidWorks { isSparkSessionManaged, spark -> { final String lastUpdateValue = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); - logger.info("lastUpdateValue: ", lastUpdateValue); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); LongAccumulator updatedAuthorsAcc = spark.sparkContext().longAccumulator("updated_authors"); LongAccumulator parsedAuthorsAcc = spark.sparkContext().longAccumulator("parsed_authors"); LongAccumulator parsedWorksAcc = spark.sparkContext().longAccumulator("parsed_works"); LongAccumulator modifiedWorksAcc = spark.sparkContext().longAccumulator("modified_works"); - LongAccumulator maxModifiedWorksLimitAcc = spark - .sparkContext() - .longAccumulator("max_modified_works_limit"); LongAccumulator errorCodeFoundAcc = spark.sparkContext().longAccumulator("error_code_found"); LongAccumulator errorLoadingJsonFoundAcc = spark .sparkContext() @@ -94,7 +94,8 @@ public class SparkDownloadOrcidWorks { JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); - updatedAuthorsAcc.setValue(updatedAuthorsRDD.count()); + long authorsCount = updatedAuthorsRDD.count(); + updatedAuthorsAcc.setValue(authorsCount); FlatMapFunction, String> retrieveWorkUrlFunction = data -> { String orcidId = data._1().toString(); @@ -125,22 +126,38 @@ public class SparkDownloadOrcidWorks { errorCodeFoundAcc.add(1); } parsedAuthorsAcc.add(1); + workIdLastModifiedDate.forEach((k, v) -> { parsedWorksAcc.add(1); if (isModified(orcidId, v, lastUpdateValue)) { modifiedWorksAcc.add(1); - workIds.add(orcidId.concat("/work/").concat(k)); + workIds.add(k); } }); - if (workIdLastModifiedDate.size() > 50) { - maxModifiedWorksLimitAcc.add(1); + if (workIds.isEmpty()) { + return new ArrayList().iterator(); } - return workIds.iterator(); + List worksDownloadUrls = new ArrayList<>(); + + // Creation of url for reading multiple works (up to 100) with ORCID API + // see this https://github.com/ORCID/ORCID-Source/blob/development/orcid-api-web/tutorial/works.md + + List> partitionedWorks = Lists.partition(workIds, 100); + partitionedWorks.stream().forEach(p -> { + String worksDownloadUrl = orcidId.concat("/works/"); + final StringBuffer buffer = new StringBuffer(worksDownloadUrl); + p.forEach(id -> { + buffer.append(id).append(DOWNLOAD_WORKS_REQUEST_SEPARATOR); + }); + String finalUrl = buffer.substring(0, buffer.lastIndexOf(DOWNLOAD_WORKS_REQUEST_SEPARATOR)); + worksDownloadUrls.add(finalUrl); + }); + return worksDownloadUrls.iterator(); }; - Function> downloadWorkFunction = data -> { - String relativeWorkUrl = data; - String orcidId = relativeWorkUrl.split("/")[0]; + Function> downloadWorksFunction = data -> { + String relativeWorksUrl = data; + String orcidId = relativeWorksUrl.split("/")[0]; final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastUpdateValue); @@ -149,7 +166,7 @@ public class SparkDownloadOrcidWorks { httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); httpConnector.setAuthToken(token); - String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorkUrl; + String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorksUrl; DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); boolean downloadCompleted = false; @@ -185,18 +202,53 @@ public class SparkDownloadOrcidWorks { return downloaded.toTuple2(); }; + FlatMapFunction, Tuple2> splitWorksFunction = data -> { + List> splittedDownloadedWorks = new ArrayList<>(); + String jsonData = data._2().toString(); + JsonElement jElement = new JsonParser().parse(jsonData); + String orcidId = data._1().toString(); + String statusCode = getJsonValue(jElement, "statusCode"); + String lastModifiedDate = getJsonValue(jElement, "lastModifiedDate"); + String compressedData = getJsonValue(jElement, "compressedData"); + String errorMessage = getJsonValue(jElement, "errorMessage"); + String works = ArgumentApplicationParser.decompressValue(compressedData); + + // split a single xml containing multiple works into multiple xml (a single work for each xml) + List splittedWorks = XMLRecordParser + .splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8)); + + splittedWorks.forEach(w -> { + final DownloadedRecordData downloaded = new DownloadedRecordData(); + downloaded.setOrcidId(orcidId); + downloaded.setLastModifiedDate(lastModifiedDate); + downloaded.setStatusCode(Integer.parseInt(statusCode)); + downloaded.setErrorMessage(errorMessage); + try { + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(w)); + } catch (IOException e) { + throw new RuntimeException(e); + } + splittedDownloadedWorks.add(downloaded.toTuple2()); + }); + + return splittedDownloadedWorks.iterator(); + }; + updatedAuthorsRDD .flatMap(retrieveWorkUrlFunction) .repartition(100) - .map(downloadWorkFunction) - .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) + .map(downloadWorksFunction) + .flatMap(splitWorksFunction) + .mapToPair(w -> new Tuple2<>(new Text(w._1()), new Text(w._2()))) .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value()); logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value()); logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); - logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value()); logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index e8745aa96..c1375ee2a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,7 +1,11 @@ package eu.dnetlib.doiboost.orcid.xml; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.*; +import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.mortbay.log.Log; @@ -34,6 +38,33 @@ public class XMLRecordParser { private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; private static final String NS_HISTORY = "history"; private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history"; + private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk"; + private static final String NS_BULK = "bulk"; + + private static final String namespaceList = " xmlns:internal=\"http://www.orcid.org/ns/internal\"\n" + + " xmlns:education=\"http://www.orcid.org/ns/education\"\n" + + " xmlns:distinction=\"http://www.orcid.org/ns/distinction\"\n" + + " xmlns:deprecated=\"http://www.orcid.org/ns/deprecated\"\n" + + " xmlns:other-name=\"http://www.orcid.org/ns/other-name\"\n" + + " xmlns:membership=\"http://www.orcid.org/ns/membership\"\n" + + " xmlns:error=\"http://www.orcid.org/ns/error\" xmlns:common=\"http://www.orcid.org/ns/common\"\n" + + " xmlns:record=\"http://www.orcid.org/ns/record\"\n" + + " xmlns:personal-details=\"http://www.orcid.org/ns/personal-details\"\n" + + " xmlns:keyword=\"http://www.orcid.org/ns/keyword\" xmlns:email=\"http://www.orcid.org/ns/email\"\n" + + " xmlns:external-identifier=\"http://www.orcid.org/ns/external-identifier\"\n" + + " xmlns:funding=\"http://www.orcid.org/ns/funding\"\n" + + " xmlns:preferences=\"http://www.orcid.org/ns/preferences\"\n" + + " xmlns:address=\"http://www.orcid.org/ns/address\"\n" + + " xmlns:invited-position=\"http://www.orcid.org/ns/invited-position\"\n" + + " xmlns:work=\"http://www.orcid.org/ns/work\" xmlns:history=\"http://www.orcid.org/ns/history\"\n" + + " xmlns:employment=\"http://www.orcid.org/ns/employment\"\n" + + " xmlns:qualification=\"http://www.orcid.org/ns/qualification\"\n" + + " xmlns:service=\"http://www.orcid.org/ns/service\" xmlns:person=\"http://www.orcid.org/ns/person\"\n" + + " xmlns:activities=\"http://www.orcid.org/ns/activities\"\n" + + " xmlns:researcher-url=\"http://www.orcid.org/ns/researcher-url\"\n" + + " xmlns:peer-review=\"http://www.orcid.org/ns/peer-review\"\n" + + " xmlns:bulk=\"http://www.orcid.org/ns/bulk\"\n" + + " xmlns:research-resource=\"http://www.orcid.org/ns/research-resource\""; private static final String NS_ERROR = "error"; @@ -307,4 +338,65 @@ public class XMLRecordParser { } return authorHistory; } + + public static List splitWorks(String orcidId, byte[] bytes) + throws ParseException, XPathParseException, NavException, XPathEvalException, VtdException, ModifyException, + IOException, TranscodeException { + + final VTDGen vg = new VTDGen(); + vg.setDoc(bytes); + vg.parse(true); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL); + ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL); + ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); + ap.declareXPathNameSpace(NS_BULK, NS_BULK_URL); + + List works = new ArrayList<>(); + try { + ap.selectXPath("//work:work"); + while (ap.evalXPath() != -1) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + long l = vn.getElementFragment(); + String xmlHeader = ""; + bos.write(xmlHeader.getBytes(StandardCharsets.UTF_8)); + bos.write(vn.getXML().getBytes(), (int) l, (int) (l >> 32)); + works.add(bos.toString()); + bos.close(); + } + } catch (Exception e) { + throw new VtdException(e); + } + + List vgModifiers = Arrays.asList(new VTDGen()); + List xmModifiers = Arrays.asList(new XMLModifier()); + List buffer = Arrays.asList(new ByteArrayOutputStream()); + List updatedWorks = works.stream().map(work -> { + vgModifiers.get(0).setDoc(work.getBytes()); + try { + vgModifiers.get(0).parse(false); + final VTDNav vnModifier = vgModifiers.get(0).getNav(); + xmModifiers.get(0).bind(vnModifier); + vnModifier.toElement(VTDNav.ROOT); + int attr = vnModifier.getAttrVal("put-code"); + if (attr > -1) { + xmModifiers + .get(0) + .insertAttribute( + " path=\"/" + orcidId + "/work/" + vnModifier.toNormalizedString(attr) + "\"" + + " " + namespaceList); + } + buffer.set(0, new ByteArrayOutputStream()); + xmModifiers.get(0).output(buffer.get(0)); + buffer.get(0).close(); + return buffer.get(0).toString(); + } catch (NavException | ModifyException | IOException | TranscodeException | ParseException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + + return updatedWorks; + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml index f1195a16f..43d7eb1ec 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -78,7 +78,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -190,7 +190,7 @@ -odownloads/updated_works -t${token} - + diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index d4079058e..70bbd066a 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -161,13 +161,11 @@ public class OrcidClientTest { @Test @Disabled - void testReadBase64CompressedRecord() throws Exception { + void testReadBase64CompressedWork() throws Exception { final String base64CompressedRecord = IOUtils - .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); + .toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); - final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); - assertEquals(recordFromSeqFile, downloadedRecord); } @Test @@ -337,7 +335,7 @@ public class OrcidClientTest { @Ignore void testUpdatedRecord() throws Exception { final String base64CompressedRecord = IOUtils - .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); + .toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64")); final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\nrecord updated \n\n" + record); } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 78760fa96..cb6713009 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -108,4 +108,12 @@ public class XMLRecordParserTest { work.setBase64CompressData(ArgumentApplicationParser.compressArgument(xml)); OrcidClientTest.logToFile(testPath, JsonWriter.create(work)); } + + @Test + void testWorksSplit() throws Exception { + String xml = IOUtils + .toString( + this.getClass().getResourceAsStream("multiple_downloaded_works.xml")); + XMLRecordParser.splitWorks("0000-0001-7291-3210", xml.getBytes()); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-7281-6306.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-7281-6306.compressed.base64 new file mode 100644 index 000000000..07b2d46a2 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-7281-6306.compressed.base64 @@ -0,0 +1 @@ +H4sIAAAAAAAAAN1Y23LbNhB971dg+NAnkiKpSJZUS2luTerESSd22pm+QSQkISEBFiAlqxn9exe8CaRExkmc4Uw9Y9rEnrO72MUCC14+votCtCVCUs7mhms7BiLM5wFl67nx4fY3a2IgmWAW4JAzMjf2RBqPF5c7Lj7N1APFONnMjYEDPxb8utaFN3Gt8dAZD5R8MHWHk+nF1DUQAlNMzihLiGA4nBubJIlng8Fut7O58GkAz/WAyUGJMH5CqGSRIPVxkjnZRqsgOi+gMqHM72ZqoBqXxIKAShJ0UCuMzuTJhgiL4Yi0M48YnRmRaAnZ2NC4nXnE1CIkBBcd0VFio8D6PIq6ApLLde0wSS464pDLdUYMLnLIohWQBNNQtnObSF3LJ7LfdRouAOXMSAQaOqKgxLWo3eVrzaIBYQldUdIVw1OwrmuVsrxu2vgFoBYlQVZEQMmRrgAdQToXB4EgsoNXAHQOZVsKi9WKuaTdRdFE6lpUZbczlbTMxwZKi4t9O7gA1HISxSHfRxDirkSWGJ35T4pDSMuXdooaTOdLIrbU7yjaAmDU1viXVnYtZ7DLQFxpV7qPmHoFSoKFrzaNVHQs8TquXpEwJsiWkl2XyxVI5y7TsCPjSnrOV1AkeSq6InoCNVCcJhYcQUA6Hh5bKumShjSBpRSny5D6xiIzqH4u8/1q5guidmIrgOfCczzXcoaWN711vdnQm7mPbGfs/X05OIc+0RVimVgRHIRQ5UeNnuWMLce9dUDdaOY59tgdHjWe4ZzozSd5HD+VWX5IYV3DJlNH6chU0IWKqISQHsOZE6uz2LNG04lnTaaTYeWiIrZqVWf5ooudAVrpGy6TReVNRcqG6/Md3GvCjbCoo3Jx4/M4lchCL0KpFqlo6spQZ9VgCdWrKt7igq6p+uN/fYzPNDrfENxz7IcO7n3m2xqbLIxXXG5SjJ7idL1pV1uPeCMfmiDrGROahC35yUXPOHR/UcwFFnskU9hutziEnjSIOfSFcoaeMFQ0iMoJkEG5rVJJ1KigTFIfxaCDMoLWIeURRoKs4ZBR6pI02FcONly5HJxzMPf6I8xFnfu58C1JBbfeQZsc8vW+4NUhDb5Pk8zbxsRrMivZx2SxpMuE3BU666IuLsQoJYtfMSTGD8nnLGOe416YmTtojj7/8LgezCIEylo9RAdzD3u8Glc+HcwtD9Mo88qdHkyWqnZWvcFLjNdEZhLvYmq53sQ5mDhNNlzkk4BLyN5EtzaCKwl6gxkx0ZP85SlMnoTSRB+Kd56uViQx0Yv8/SUPgwgzE90UZHBpr95e2MXIb1yQDPHWfp2P/IH9T0SY6L19VSgVnFHpq7HC7DWEB6Ztoiu7MHSzoRsTPbOtQu2zDUDwOo1iHGITXeejr6COcBhWc3nJkwSLgCvrL/Oh5xseYkGB86rg8NUqc/BNqRln4XhaRgCyrhzJ2RzeMvT7asJ+Ji7YVxBLqch/ltNPQxzQysO/sICe00Svy4ldc/aRKPHh0Fyg+fpr1tLpsi82AbWcy4Ip1mxZfrWVXu2d2Ymfm6ofqzpKLbKFWmFViWcjp1tTu7pSldbpy/PGNET7pq2B8hoOOK28OBHeS00eadexXWc6HDCScuYPGL9znYuzmhuZ6VLNuIigMf6XBCgRGCo+68ATkRLjKwwetdzPqiBhlgl1n11IEq7Oaq2hzp93rRn5vpQRGjxIyjxLerZjTUbO0L2YjkfjRz8yX/e09n9LFpWSPUyBjbzhaDIeI/jHm4zcH1tcYMxS1h4+RzFsrxZ/2DSdk8rTPRRunwvt1iezzt0G4YCyHRx1xTcjG3CPocjmp0v2ZxzFv6gZMCJ+fz6/fju5fffk/Y3Wb4cnnRZX3coyTbhobtxN+Zlo5hBBAprkbe2x4SiPNE3YCFm3/m8yXzY4vRjXGqp+7B8buF7saw1jP8nXG9RePKg1xL14oDfg/SxCveHvxYPaBaMXD7QLTS/2Ty5QvXihXdh62o70C2IvLugX0n5ycLwA97QSywt3TydyccHvJ/vaB4W+DsTyA0Yv9rUPJj0dx9UHml7s6x+E+jkKyw9Q32P9VFZcFAqBeiz+A4MY5OQYIQAA \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 deleted file mode 100644 index 34de6ba16..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 +++ /dev/null @@ -1 +0,0 @@ -H4sIAAAAAAAAAO19S4/jRpbuvn9FoIC+0w0U3w+RuuU00plluxouV05llu9gNokQGSnRphgaksqyauX5FzPADDCz7IUXc3txgd4M0LmdX+Ffck8ESYnKFClKKbL0CMNVlSmdiBOMYMT5zonzePXlz+MQ3ZM4CWj0xQtNVl8gEnnUD6LhFy8+3HwtOS9QkuLIxyGNyBcvZiR58eXZ717FxKOx38/+QROcjr54oajwnwR/DMk0TFcy7N4/vEDAIEr6QZSSOMLhFy9GaTrpK8rHjx9lGnuBD38PlShRCoqiBfGnHk75sKqazEmKNn6QpEHk1bcqEc3bkQk8CU6JX9NsTlO0oumIxFKEx6S61YKmaDUm4wHM9yiYVLda0MxnI45pXDMT7OuC1qPjcd0EZN8X1NkSVlNn3xfUExgWhVWSfJLiIEyq2z2mLHr4icw+1jLMCeZPPobWNU/Ovp7T/py9Q1LgkygN7gJSN2dPiYt+7qZRtgOq2uYE81mJyR2JYeOQuglZEBXtsO/HJKlpkxMsdtF9AC+gNKFJUP+SP6YseoCZ/am6Ffu2oBzBNqHxrJo4J1is0ySkszFMZd1iFTRFq3+a4hCmft1OXyIr2iYkvg+8ms2XEyy/u+ve2PnawAkBcxjULemCZrGbEoJjj236aVzz2i7TLUYIv8fkPiAf64Y5JyraDaZhzaqybx+PDzpI6DSum70npC/Ofofgv1fZ+dHnxKXNk31bppjGwRnrPYHuF12vkBKvlFKLJ90w6XJW14oTPGk2okl6Nmc7J+YfZw+i1D7Jq9KGLe/wEqcyRUg9HJIzEr1SVnycM6zt8lW+o4qtV2JUfOPFhG8BaUzSEfXPLgM4ntNXStX3T3tIpoNxkDBxL/kgzM50VbMkTZNU+0Zz+6re10zZ7un/uOjzcYsnEx3iJJXGgBlg7vyiV11lveo661WDXh1Z1+1/nE/5ijYrnjbEwZj4Z2k8JaVnzD99Sg84JuuPC4VHzR59WdN6EgdjHM/qe1kmypd35QK+ys6V/PipwUrFAVR+wbKmDD+g+yAJBkEYpHAmT6aDMPBe5J2twl2LTspLxd+PxSJptqSakmbfqCpbJN2Ue5q6WKQl6pX9rVz6Nb3WLX3poRfIoT8M7knEQVRydhHiqR/QV0/wxRLVmg7v8DgIZ5z27G0Q4mhVf2WixYoopSUpfbzAeSVYmNQtdonshVLqaVkyPBIotT0+Il3qlb+mGZaq7SODU+WWOQQpsEr9QxV4pdxBjucK4FfbvqBZHvtToLYK6dU/1wr6gsl8TbN/8g8Xsr0EBaRkOmbbvnYOFqBg47PSgYOyb2iyrVsNz8rSMEuKTe1clOmWl3rR11y1qp/VOVVlP3PEt+a1m5NVnFw1E6eZNxqcNHbfNGXL6G1w0pTX+O4OztZMeA5jOp0skzYcitpjQ1HdvuXKtqM3H0q5/9K7migryBaTVZrexZs5TSVQ4AHYaYam9XTTeoFgxSchnklB5JOf4YsXzdZCWfSwQvg8HVn5IR5JGjY9qmRoNzpgAaOv9WQQE00kTVeTX+aRwd3VNE/pJA4fq6nLLZ6JiOu63hAl13W1FjmvbKxsNCuP5pCL01y2o0IkryBZvW5Kg4UrGPpkguOUbxne4zX1AhwiHPnoYhqm0xh+OY/SUUwnNKTDGbqcN1gwetxJLceYhkRKgxTUgHP/xymcvOgqpncgJ2k877FEVP9upsC45h0u084ADjA85s658E/WtoM/7F1aNMs+WNvOx7MzVVtMEp6tW681D7NQNYc4Cj7x03n9MPiKnE9TGtExnSboQxRwC2c6Q/QOfQUQiYSA9OajqF7Bcq85tGm2gzxgdvaUE/+4WQd0GqWgQby+XjTOP1q/A9cOdb5gQYLHg2A45Ydvs1lu1r6sTQ9jOEh6qqPKd4t3o1HLTQfBmudnwDfv31yu4lYiWT+Rm85PyaJQR/pKqRXhj0CK0hyl7B7QbI2tyv3vCtA4lgly3d0ez8w7eC6c0VxJdSTdudEspuIatuzou4Yzz5r6Mg8BZ44YzpRwwxxUcDDzfm4NQBLK7ITwzZvLNzcf3qML2A0xQXfwSUH3EohAvR8HET8zeB9voojeZ78GEbqB4QTJGHWCWewWMUsX+ONdkpKQYHTtjSgNGfAopu9/4fHkf6NvaTIJUsyOoBMFIp9TWC7RlYwPq40Y+W1frQWjoKkyhDy+kKvt7AlxVa+Li+La/kpkVT2VbpVquyrTVfW1dFtX29syZVV/T26iGplBF9RV/eb3g7W9FTRVfbAr09oOOMHmRq2trIGPB/cM3MehjQXgw5VVtyVDFpsaPoGrsJ5h6z0YTS20Y22VBeWqG5InUHFjsxVMRu9G1/qq0Vdd2XJ6u8Z5z5nqMg+B844R5/FNUgOyyqPKyF6HsGcmJEQ+QSFGxGOGkIdfMWA4AHxjHCISoZDCqRXTGYOFCcJwUgBd+PCXYeDB7/40xlFKEHRy8e6HN5dSyYZUB/iUdaNddTSsf64SdbNXodRASmcTcgbQNpo/wZNvN+7zHodTcqZqqin1NMdY2XNGs3HXEV+i4BPxUQqLkATco4bdQr9Yw3DRcnOu0zic7/AJjQGYymzK+D4v5Kjy5vr6e6V+DKyfjZnHJMwAAACUM2ZYlejdyt6XCNdv6bWvzSrKKrSavdnsbfkRZoPRwkADLyTFS1/5Is2POi6SMjy6kTKkqw2UocWB2YhN9jzFo2Q79lsaB59AbQJEdA4InxnAiwMhf8hl+hVY/YlIr8HnawxYewReGp9lGx1l2dnj02CLg6n6UNJUWetZtqbEmMj3mitr2jbH06ZHU1O2TQ6pdQcUTBk/mJryrD2U1h5ICQm3PY2anUQNT6E1cFmzerZjNYHLBWVbcFl1b3S1rzt905AtS8BlAZf3Gi5/BxA5jegwxncMJMN3FI1IHOMxO98wQ9EBt4N6wcNfIuRTDz4mMrqKgzGJGYzGCYPMBE0jhrHvSZIGw5w6oYOYoJRb/iiaIeoF9LCR9HbyqqrLncqsKiZdya0q/q3JriqGu5JfvP/GaLpiNPuksYHocyRTd/TOXq9aht1obPVjOF6NbS5bs4lim+9nucn+q5ySz6kEbuvFs7hIq7kRK7djbju6tYEXz/YKZ6Fl0iGTvK9/nhAmU0HohkLfrF6g/dM3HU3TFLfnuLpjmrpq6668lU1sG+HdmPeuNc/GjE9C/bQNw3XNRrc1OWVrtzUOCzwx1L6pyj1TqJ9C/dxr9fO3X/7lAicEJenUnyGjv3BQfYmuJziIfvvlX4XG2KbUqeLUqeSpGkS70qeK63EokIPdK5ClGTc7e9nWM93VG8aCwOEYCX2A8/xVY5OorOd/wkpk7UZsokkOKP1J8kZ4AmPZXzVy8UwbqJFqN2okiNB39yROMwdLkJdfog8AIGOeuCaIhtyX9S2O8JD98iEe4IUz6zcx/ZiO0IDMKBBdEZC9k8xt7QcaTscE6X3E5fM1yGdQE09bMd3Ci8vQVdNophdklG16cRl9zYT5kB3HEXqB0Av2WS84R/DS0Oz2KY2pP/X4jVIfBWM8JBGaIcLtZSTyAsw8ukhY3DO1py6s2P9l0pKUZEiCX4zRKAwiwl+1q+yE92miZF8RP0ixcnX9mv2rG/LEvzt00bkDcXZJEEwRonEQwuHRbMEPSCjpGncP0PqWJeuq+3mEkmZoDY1VGWWbQkmHmejrIJR2bqx61lSXeQihJIQSF0p3sO14NhfEEDcqwW44r9CfpkmK0hFBXxGA2hGg7S8P23TVlg1B6zm24bqO3qUNYQ3T1m0Ia/ifjA2hmCQPDzIzAoMxyYhO+A8VM3W6cKh0xvTR6589noSKR6t6zFeZZRbi+j3ByTQmCY9UjfE9yXJ65Hr+QSGkk79PVh1V0Uxbs23HkdmbJmtWT1PdXleXypsNYNub5W7veXe35CTwd7rkupTosio5lmrrhmFomtXFOjfkehiLu5tLfNXWDKdRyGVB2UAvUrfyIc9yeal9y5Rd91gu8b2QvYCN4ehq3SjrZL44umQBUJAc1zGeqSKt6qljFanBBK1Sk649OpkmSEKvw4SFpsfNNaVylxiQRcwRCo0DUGCEHps1Vracn8p5rdNtVxN3quV+ReBXdlXk8TQpfVSouHCseWSMfyrulJIJ/M7CaP2pV2RJST9SFJFgOBrQeESpzwHp0wwcB6kOt+XJsVO8V8Wue8xXNZJj8K7YDgNWdblLHFjFoyMsWMX+GBa9FXOYZvZUzTBVVGy+dhZd4jVSni59bZdL3bL2HCqfOWopzWXp8w37GpMkwUOQhDiKaIrmY0WLFIiIPzdiE9+vmail7uonqf69zmbpNOxwuRku4eBRhi+VIPqIYz8veSMD3ZdwzH3x9HDgKbzYU0YkfnP5xdvvnZt35++vS9DuUANIdmCvm3vWEBZTORkJp5kt7idNVe81u5/MKFt2prdYFQfT2HmKS+E0sw963fHcT14EUx/7JEGzecz1lLn70T76239/iDBi4ieLzGa7IgEIAEfGICQt3lM29JkpboVoljiSy6MkSAmrPHeHp2Gq3AUh/Mb3jmopb3gyJ3KbP+fvdTV70o2daGLCwkePR/5UZ95kKvtS3k0hlVbMZp3XTE9vlGGkoGzXlVN1+oYqpJKQSnsvlUJ8z7OMxhhNSJTguOS7l3vyvWW+kKB8PfxnRPbGgROHA5xMs5crP5A9OJAVEEOOoxA4M7NnkEgkhVgaEhbMDQephyUfvh3nzySVH+l0HRleL615iFE2Xw+/sglDMGFovPwSCPG0Yk5r8sW6PVdtli82o2zXqVM1+pYmO6ouxJMQT/ssnrbxN98P+dR6gAE/j32JyaPjkUNX59fvrl+i9wTOvyy/2c08P9kVTuMAuofXs6iGdVBySNdY2nTDZIejbmufSU3StaZqEqdsObhAY5PR23kixmdNdZmHkENCDjGyN3mcGzvS+mjJ7fc8Qq/vaXjPHC+uRiSiY/gTtSeGDvYyVcQWiNgCEVvQ3FYsYgs+C0Cyes0Ku+SULduR1b7Vk3WhqAuAtN8A6TuMvCnLLs3uL0E3e/g1YabDPtPMs8OBKXHzL5g+F+JMZ095la08gzXh1V/G9D7gKa7h54QXQWaOGgJRPUVUkmNKruM4kqubkt0hqnJMxha4rmbaOqpaw/94UdWx456bx2fCMKQDHMosAVIyIewswQny4uxuImHWv0NMNXFQgZQtRtWZPcOFfzuOqqvjehhRdbtb3LaiZHXNsh1Ds4uABde1VbOLdd58AIex5DsMpKwvS14OpNS6CaTsudqRqDgikPJxYxFIub7rvVFJTyWQspweqGQwm4zoIAC19Rz9icFIMkM3I8B4wxH6Gn5Hl8TjPrwlz81Lck9COmG1IV6iK3jMqAjB/I56OEQXNPLgtD5w/bXN8MqdAYUqdt2DhaqRHEOkXcvhlc9QCKp4dKQUVLE/hkVvxX6V7TndRMXmE+GVq8MrayZKhFd2E16ZHw4ivHLpeVaHVy6hoBI+Ega66qXaTxuO6tq2azl6jopsS1WtTm04zQdwGDac3S15ezbZnuFopt3rJKNdQ66Hsbg7NNA1KktQUDYw0OnPM9CxHM0i05kw0FVMqTDQCQNd2wY6nwx5PZw+wrlPCHMTIWPuCTeZ3w0TYWdrHcxVsese0FWN5BhMLu3a2Z4D8qp4dAT0qtgfw6K3k8asZ/Z6tmmjYvMJO1tFGrPqiRJ2tk7sbMXhIOxsS8+zbDL7U/Ybu4W8niYpDiI8CMmOff4XzCs0VVtzdceyG9QqWlDu3pVEcyXVZQF/aq9v6X3DYGUvhaZaqalqzF3YlDRXe66muqqnw9BUL2KaJDG5e9ydUHKEkrOW3f4oOVX1tzcbwQEV4X4GEtl8jg4XYZTbjXlFZG0BZMcblEhetGqzRHInaKbTFA9G39RkTbP2+85wS1WzNhxpEcG7Wg3a8e3RbqK7t45Bek5k97HcVtm603MaprbKKNuMmNX6ltpXLdntme2kFNlua5d5iIhZETG7w/j+Q9YD2k8zspUUqur/1POMHArOb5pkRNkI6x9lvrWL86/eHBTCFl55TxVXq8fSkXxGr7yaARyG49bulrzVSGmr1ysdDJ145dVyPYzF3Z1XXs9t6pXHKLsIm9VcV9x1CK+81VMqvPKEV96utdZyvRL2xTSC8w2Bfhqw19MjvMzoiKDzKB3FdEI9EpE+wh4rVzqF4zwaopSMJzRmBt+FBnzYemyH91nbY70qdt3jvaqRHIP/VuvBsVtjwCoeHeHAKvbHsOjCaU847TV4u4/Waa84HITT3tLzHO01914k6jVNtZE6XlC2WYZU68P/hitbR+N6KNQ73vj4rh2vMauc4j3J0zuN0CSmcCKNcZ5xE8EL7DFI0KK34QYFSEGr9GgEKmPCT9+sCGn+MHAY56mEPSxNI6l4Eil7Emn+JJKmqr2es5FEosC5i/yqTgM5VG435p5c6iKOZtzctctY4LQ2XbtuRoTlv5ovmpByKyetRspZltmw2HZG2bKUc/qqKttl9xch5YSU2z8pV/arDxI0ANEx5glp0Bj/COpukCSgPt7BTx4/JhD2WEwAN10ymbEfIo+7ehAY5XTM3zZW+87HCpMVitpTSvZTCURdTD/CIxYiLxeBJy/pdLUTSfd/mEcOeg0IBV40D33N1kxIu5UTV6fT6VajxCcFZZvSjt2v9jVH7lmiiLeQdoci7VjgWC4JCjXusSTYNyWuiRhjepuquycvzUrMutTb0GmLssZbYaPLptZ8BHVLMy3lx6wEMbuydWTNllVTW8lo9y6Cm/HfRYROKbJvM+YnEaKjWY5hNqr4UFC2hatU90ZX+4bbNw3ZOpqKDwJX8cZHjateojErQpfiuIjLnxVlp+5YxQAZfWDV6wbwbiEWvxNzVBP4Ldaj78KzoSXPpp1KqCpunUupqoG0L6mqOB+Hc03SgnON7VpST9O7e+FqGe7q7ZrQOMWhzKaMv2UxyU5E5c319fdK/RiOt2LiIz2U7cKf5Y024n46rrSpeO5Aibw6v353LaP3BNBkillp15tMYILovMJpHED3IOwvpiG7xA2Film9cvuX6IHVndVsR3ckgJmdZITnNV85S9PdbQb4hvVma3mfhCJpG6pj280M9Bllm7ke9L6h9zVWHV0okkKR3GtF8iKY+phld0M/YI/7K5FEZhbWO9gRrMj5NB7AQHkxY5JMGEVSKpZ+4LXj2qx9/jwZVMWifTlUxbl1WVTF+CAzPUxoKIUkkmGClDAMBqDvKB7faz65n+805cv7L1TT09w7rJFeT98I1neb9qEJpN8FNKfh3/4fTBx67QfZHJ02+t7GV0HXjYZQKKNsEwoZfd1kaa9UWxdQSEChfYZCb5ht3J96mQc6RiFG3hwd3ZfQ0flkGqUkQQmFY72wtaPZEzs8nGJfYRhhSCMscJLASV3ipAOxeHYElRhGkrwRnqR743ezA6x0seJ06rM7vsa62ykjq0Oza7pwXNimqVpb1SLe4oxcw7BVm+Ya3idi0zRVp5FzTEHZrtOxqffVHsu6IYC8APL7DOTfkwTErjfiUTQ378+/RxfwdZCmhCAJLbkkB2MQiynPGTShCeyekMAPsHF45qAJHAUECei+e3FU1X37IqmKc+tiqYrxQZo32QyRaUxBzQjzH+Af5T0ZXuIUKySd+gDZr28+XGYxYbbuao6pvLl6990tfPoH9uEfsw9vX38vT/y7jfB8TJg/yfEg+ddsBgmO0BXMZ4BZUZ2X6Co7hy4J0+XYR/w8u4bN4HG3BH5qXdARSZgfPKcOhLl0c5TV07RmVQJyynYDmS2T1bTWVBHaJVDWXqOsLF3HJ2Z1QPMUF4iwqmABO60C2BQswitlNlH4nGViTFISfcII53ZRYRitNow6kms6titZjtRZUkVmmHQ4W8v5TKUC1vA/XsPovrtr7gDlnEc4+om8RNcTwu5HiIAqKyetrqCRpjYtaMQp2y1opNt91ZR7Rk9AFQFV9hqqMG0RzauZJgBA/Di4J3GWZyV38+b6FOzPOEhpzO5vs6Se6awP2OUuZOVNQTQgmqWRHpN0RH0a0iEchtCW1z9NWA5FzKBOZoAKeOqyIfx+8I5yLYS66IZjS47VU7vCN/UMuwl1qR/D8eKbR0akHAMGP9JknOapXEEeyZPRRMk+VHL8o9wH5KNiGHbpKD3UmJcdgKg3EZ9q1hKOnXLqVp5HEESjh97iCA95aeblimwCbq2Y3roUd46rN0xxxynbtQxpet9wZMs0BNwScGuf4dYNgXc9AhWPe57cB/hHkqAZSlbmd20PF22Yzg5WiYQEZ9lbg5Qkik/uMGBD5S4I4Te+XVRLWTzd73WVRPDXD/wJ4YcZ/Cmy2P7+wvj9Vwb7+mYas1/OL/nz3r7L+Zz6nUc+DyC3RpRyCVau4PItTSYBwCg4S09bbjV+3zdSA9ouCWe5PU3VXGcbZL8xqm/G9ZRKwtmGDnKsYQwAp2w3BsDgJeFsa9elrwV0EdBlt9BlxC6qguieqVI0YlLJi/EA3QWwsVmWXmY7Cimz+IxIDAJqSBZ2pRSOJm4kYnaf70FYjwi8hugrdksWHrb9p6VsO6ahm+zWB7az0QPk4XSZZqch852VkOrSMlMxoC1Xv93SYc/BClU8OsILVewPzqdr88pKxdztorLSkYZnMITDhMa3haTg/qg7NcYteFdiQV70t1E8aEHZfnlgQ+7px1KpQZQHftxY2XiCVkFDUR5YlAcWyH7NswlkL5C9QPYC2T8d9P4ge4Hol0Yhrikq/vsM1xS22jMNV+v4mqKW62FcU+xucVuoMqE5hqUrJA6xl8iaamhbOdBvgTsaMj6MJd7NTRS3KTiNrQ9OF9YHU1atY/FZFtaHx42F9WF918L60LX1oajVAWdNiFhEXEqQT+5JSCdzP02fTAhOcJQGeW2rIELfMcCJzscAONv0lTpgS8NupH0Vm+4kftUIjqFSRrtmhueg+CoeHSH5KvbHsOitxMZqTs81e5aKAGSZMP8tLbpE4hhU+ydLX9vlUresPUfBZ46qluyPi8837GtMkgQPQcjhKKIpmo81D1MCeBkj/tyITXy/ZqKWuqufpPr3OpulAzKedmpHKw6HXdjRjjacZZ75hBVjIR/ZtcwS5ImyLCc4DgYDRnadAsITWU42VcNtvWc7DUOHM8p2Q4dhPjRDNhz1SNRwodbxxkfoEDrP7nzDhR37RUaXrFZ0ApBgRiKfIhKiYIxBs4RjirJ8rCFGPqsOjW547tUQeTjhX9w8/DUinw5bj2un7J1jsbPB0POH70iDK7juHMBvUPuudgynFhBMEibgaQKLlO08BrVkHCtXl18rP+g95XvVVO71XmRi1dw4hup4YVQ+bayAHYl5LoLgPou/y08wgZhWTmNNCI1t9KxmITQZZbvRv6yomCY7lsgLJxDTISAmXnu6j7wpzhFSUScDTvYYs4MJQBIeAn7iBThDOLt45jgmnmmSl9AgIm3KCsOnaeuSbVj/0BVKqmfYUdqU2jGcDEpiEwWiimBe2gEPvIil0fMnLMMQN0oB5OgpAJF0SykXgZeyfSgRkPzZxxLfh/C3FGIp24eSTyTYh1JpH0rFPlQEzmKny1s29+gDn/zTRlSND9rNfFxaLQvsOKYO8t2Q3C4cgniZH87SMFYzbLkscC3vEymhobqW2bAsMKdsuSywy0to6CIOXoD4vQbxr8PCbpAbMMdTBuNjlidRRn/77/fTIMKIsmJTCQ0DP0/rw02fCWLWUTp++BV+9PB4QhIghk8RbKCHXwFq4OTLQ8f17dXDe56QqmLRvqCq4ty6sKpifKihGhOc0IRGYRARPkesHB4NiZfXweNfS6z4L1Y005Ki6ZjEFETAMrhnv/FNK/FNC3/DJpSoVNqxDP0C4scZyic+9kkiLTYsk3cB360bYf9uyxL3usH9V+fX765f8ktqls2e3eTML4aucBoH0D0c3xdFKtnT1g22KlpsmQ2rcGSULScs6vUNQ3Z0UbRYALW9BmpLRYvl0qlUHOkZZnuJuA0WNmmMH/6LFvfRIfKpl9IY/Yn8NAtDaPY2ltG3M58IfCbwWZf47EDsr8cB0TrOt9MRRutAZz1lUNemwbe9CF/DVF3bUbeqLb99XEAt18MI/9zd4rYR4Qv/KXiMI1nTXavXxeI25HoYi7sb6zqL2G3mVFxQdhHba5jHYl0Xsb2PGysbT9AqHU7E9orY3h2q4O/SEZyhJEnQeZSOYjrhRaEAvKEbyiKE0JsBQ//zgJZlqlLi+8PWudsK732+qK/i0ZG4r2J/DDGe7Qb2Pge8V/HoCMBXsT+GRW8nsNcyHc00DMSgRU93W1r0IwjsrZ4oEdjbSWBvcTgcZmBvR9a4lXAnSU/bhrZVGIplNg1D4ZQth6E4fVOT3aPJnyV0Nt74+C5G392DXpjnb56hsi+8jEoF6oYhHTD/dqDxaJTCZqVJlga6WezJEmMGKkK+F3gLBDMxnIIszncziV4sDatUm3Myykb2DR8OwBMYYcIJspTU+dhKQ3rEa5faYsPCebngxGSS3RWyisfBPU0Un3rTMQ8fUPKierdBdAfggtzSxePfzm6XVmVv6+F1JDBFPbz2RKitag1FKKdsV4QaBs994R5LAQwhQnnj4xOh32EURHg65PVM6f/8c35Xjn775d8ucIK5/9A1DX/75d/Zhfs3jAqDpGL37z+S8STkGS98knARFnwqOmGn/v/8834VfyWhH7DkHTLz5MBwzk6jGVboJIiYkqqUpiGSPHh2KaGhNORPLC0/YXSrGfotvHmmq5nyKB2HG0m1iHxMJnhC4g41QruBgCu3gz+wlbRFworsg7XtfDw70xYOROzXtoQpCS+L9TwocanrN5rTh/8NTbbLJZI6FZeg3jUUl5yyXXFpmqzsuWPu2hX3WVNd5iHEpRCXjOxDhNGQRCSrcc5kX6HfPPzfQeC1GfNUJe0WooOyS8pjkyVmJ7IEYJAfxMRL8YmHdTR+Hze6hGrNSUw3dVNTYg8HMnsxZQ20Ll3WDasjd7HN+O8iFhwmkUuIjZmfRjC47Viu3gjY5JQtAhvN6Jtq39ABeogYIwFs9hrY5FGNzEY+wTH26Y/wM3zy8CtX4g88R1NLfks7lT5V3DqXQFUDaV8KVXE+DoeaNjKqaoYh2Zbb3QtXy7CjZKq1YzisMLWKAe3TOwab2pDULlPR1TLsKBVd7RgO6x17nt+U/7O80WHfODYx5sUA9tZcoS6M2BuYK3S1E3NFkaPi4s3lu6+Q/3fndyRO0JuIry+7oMDhYdnE9+EKWXdMtZkXVk7ZbvkEw2JWa9cUyYCF6rjXquM5S0/BfT8ffsUv5zmAZ+xaGMcxDUMerO1NCaiSD3+J+uVMwdDTNApSXlSB3TYP4LU7bF2zFRCm6rBzLau72Jh6ht2AsPoxnAwI4xOVQQUm2hMZzzdcgGXAD8o/TbEPnSZK7k+gMGSlaKaxESrr3k29CTLbAVr6+3x6uGvL372J4BhKpym6YD4gD//B4FNpQk/8/mertF623TStF6dsN60X8yUApcRxBW4SuGmfcdNFnoKGV3SBnzzsUxCACI537oI3iemYFvlrFogJSILMsz3Mc+BckjDNPPWuMOzjh18HGP3hqxgnQfjHQ8dSref4AoTRFaYq5djS2jWgrs3xVcH/ZDBVRY6vqwxj+DTP70Wz/F5X16/Zv7eafXtJ7gLQV4J7ekt4/tONYxo6TszVEcYqsnK9L51OM/QNK0ABctHL83C9Z3m6BMBaOZ81AKunmk4zgJVRtuys6fZVV9Z6mgBYAmDtM8B6iycZLCoOpyzy7hRcNK0Gp365XX7nsbC0bHDnUSo73uadxzmA3CBGb/EQfwJ5jc4vX393/v3Nu2tk2Orf/iqkyso5rJEqhqM2DAHIKNuUKmpfA7XdlXVXqO1Cquy1VPkO8yKGWShYkVyWooc/p1Hg0azoYQLK+8N/etxzruxFx6XR51HXNwyWg2WigyLvccn6DL8lJA7IrU9uSx8r0AcdxvguwMnSzyS6zfTglN7eU48ktzjPoHw7u4X9nf+yzxrUlrLUbiJLdyAXX0cohZeNT3Ef8Tl+iYp5BR2sPMlCRK6YzhqPAE3VmlUWyylbtGxrbh/+BxFp6o4QkUJE7rOIvCBxSj7lqdW94peSEbufi1BWu2IGItObMkMRbU/6HezFP3dENGHP5w/fjaV6zvVzu2BWjuFkLNVPAVkDNBbRFABWsOQPqETarabZt8V+vCW3i80JHYRFqp7b+d4EiDbfm8q27f5QZEj6I0DJUHq977nztoR7Vkdw73u2tGh5aTM/UAHvVk5frQWkYaxgTtmuBUTX+6Yl26qwgAh4t9fw7vLhzx72MQL5jaYRRsSfFhd8LHaQuSSUXD8Tlh8uCgYh6WdAg2WJy6UXGgAVrz2fuzKwjHJxtOwWWhTvSUCisB88GiEmf0r3ing8YPCF1QB6HS4Kn0Evk5D8SIsRssOSZTZ6i4PJw19/++XfXzK21zhKAzzkjC5GQXjw/qft+EzYak/VbUs39a5Q6HqmrftLrOF/Mig0n6MwGMQ0kWHTj+XxzwrLWBlE0yArWRZPJzyEWJpJZEziYZZ9U1qaRYn3sHnKrD00+e0Ay12U5+8lKiaQqcSLCZTRd1x5TgJA0SEnRdN4gHnpsgidjx/+zJIwAxX0hU8bBDY+iTc6iNutX2Y6LNG4oWnaNufqxpp9M66nVuKq1+gmtKBsv8SVIff0Y0m/LUpcPW6sbDxBq9QCUeJKlLjaoVb3dUzHmYMmSibYIyilKEvjnWYf9NE56GjpiF/uTkYzlr7aJ/ckpBOWCRwFEUpHpHSp/T64J3F+3c0uuD8F4R9fXhLQ5CZMGcuqtiI8/31+UU77iCXaI2l2j/zwa3ZdvhxPWLpJPzq/9zZLLj0Pb1Tx6AhzVLE/jgwxLSjuLIaShVAizYbJ193Vtymi5FK/ZqJEyaXtLQaNSy4tDofDLLm0QI+fLZYVPwpl7cAWsPQd6yJ/O5Y+X/y4YPAqfw/62T9nv/v/DSZhtnENAgA= \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/multiple_downloaded_works.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/multiple_downloaded_works.xml new file mode 100644 index 000000000..b155b16e3 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/multiple_downloaded_works.xml @@ -0,0 +1,57 @@ + + + + 2015-05-23T18:56:52.486Z + 2017-02-28T08:22:12.454Z + + + https://orcid.org/0000-0001-7291-3210 + 0000-0001-7291-3210 + orcid.org + + Paolo Manghi + + + The Query Language TQL + + 5th International Workshop on Web and Data Bases (WebDB02) in conjunction with ACM SIGMOD 2002 + + bibtex + @inproceedings{Conforti2002, Author= {Giovanni Conforti and Giorgio Ghelli and Antonio Albano and Dario Colazzo and Paolo Manghi and Carlo Sartiani}, Bibsource= {DBLP, http://dblp.uni-trier.de}, Booktitle= {5th International Workshop on Web and Data Bases (WebDB02) in conjunction with ACM SIGMOD 2002}, Ee= {http://www.db.ucsd.edu/webdb2002/papers/43.pdf}, Pages= {13-18}, Title= {The Query Language TQL}, Year= {2002}} + + + + conference-paper + + 2002 + + + + + 2015-05-23T18:58:18.492Z + 2017-02-28T08:22:12.455Z + + + https://orcid.org/0000-0001-7291-3210 + 0000-0001-7291-3210 + orcid.org + + Paolo Manghi + + + The Query Language TQL - Demo Presentation + + X Convegno nazionale su Sistemi Evoluti per Basi di Dati (SEBD) + + bibtex + @inproceedings{Conforti2002Demo, Address= {Portoferraio, Italy}, Author= {Giovanni Conforti and Giorgio Ghelli and Antonio Albano and Dario Colazzo and Paolo Manghi and Carlo Sartiani}, Bibsource= {DBLP, http://dblp.uni-trier.de}, Booktitle= {X Convegno nazionale su Sistemi Evoluti per Basi di Dati (SEBD)}, Month= {June}, Pages= {427-431}, Title= {The Query Language TQL - Demo Presentation}, Year= {2002}} + + + + conference-paper + + 2002 + + + + \ No newline at end of file From 64311b8be468c46ad8d03ec206feea5a082ac00c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Sun, 31 Jul 2022 01:03:29 +0200 Subject: [PATCH 2/4] removed unuseful accumulator --- .../orcid/SparkDownloadOrcidWorks.java | 38 +++++++++---------- .../oozie_app/workflow.xml | 4 +- .../src/test/resources/log4j.properties | 1 + 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 72122f8b9..6b9c56005 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -80,17 +80,10 @@ public class SparkDownloadOrcidWorks { LongAccumulator parsedWorksAcc = spark.sparkContext().longAccumulator("parsed_works"); LongAccumulator modifiedWorksAcc = spark.sparkContext().longAccumulator("modified_works"); LongAccumulator errorCodeFoundAcc = spark.sparkContext().longAccumulator("error_code_found"); - LongAccumulator errorLoadingJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_json_found"); - LongAccumulator errorLoadingXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_xml_found"); LongAccumulator errorParsingXMLFoundAcc = spark .sparkContext() .longAccumulator("error_parsing_xml_found"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -107,11 +100,10 @@ public class SparkDownloadOrcidWorks { if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { - errorLoadingJsonFoundAcc.add(1); + } else { String authorSummary = ArgumentApplicationParser.decompressValue(compressedData); if (StringUtils.isEmpty(authorSummary)) { - errorLoadingXMLFoundAcc.add(1); } else { try { workIdLastModifiedDate = XMLRecordParser @@ -184,7 +176,6 @@ public class SparkDownloadOrcidWorks { } else { downloaded.setStatusCode(-4); } - errorsAcc.add(1); } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; @@ -193,7 +184,6 @@ public class SparkDownloadOrcidWorks { } if (downloadCompleted) { downloaded.setStatusCode(200); - downloadedRecordsAcc.add(1); downloaded .setCompressedData( ArgumentApplicationParser @@ -214,9 +204,20 @@ public class SparkDownloadOrcidWorks { String works = ArgumentApplicationParser.decompressValue(compressedData); // split a single xml containing multiple works into multiple xml (a single work for each xml) - List splittedWorks = XMLRecordParser - .splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8)); - + List splittedWorks = null; + try { + splittedWorks = XMLRecordParser + .splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8)); + } catch (Throwable t) { + final DownloadedRecordData errDownloaded = new DownloadedRecordData(); + errDownloaded.setOrcidId(orcidId); + errDownloaded.setLastModifiedDate(lastModifiedDate); + errDownloaded.setStatusCode(-10); + errDownloaded.setErrorMessage(t.getMessage()); + splittedDownloadedWorks.add(errDownloaded.toTuple2()); + errorParsingXMLFoundAcc.add(1); + return splittedDownloadedWorks.iterator(); + } splittedWorks.forEach(w -> { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); @@ -228,10 +229,12 @@ public class SparkDownloadOrcidWorks { .setCompressedData( ArgumentApplicationParser .compressArgument(w)); - } catch (IOException e) { - throw new RuntimeException(e); + } catch (Throwable t) { + downloaded.setStatusCode(-11); + downloaded.setErrorMessage(t.getMessage()); } splittedDownloadedWorks.add(downloaded.toTuple2()); + downloadedRecordsAcc.add(1); }); return splittedDownloadedWorks.iterator(); @@ -250,11 +253,8 @@ public class SparkDownloadOrcidWorks { logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); - logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); - logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); - logger.info("errorsAcc: {}", errorsAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml index 43d7eb1ec..f1195a16f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -78,7 +78,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -190,7 +190,7 @@ -odownloads/updated_works -t${token} - + diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties index 20f56e38d..520bf1a18 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties +++ b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties @@ -7,5 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.logger.org = ERROR log4j.logger.eu.dnetlib = DEBUG +log4j.logger.eu.dnetlib.doiboost.orcid = INFO log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file From 27445ccdaafd88270746d375f26784deb5f67cdd Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 25 Aug 2022 11:56:14 +0200 Subject: [PATCH 3/4] cleaned log --- .../doiboost/orcid/util/MultiAttemptsHttpConnector.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java index 5ef6efa26..fc8f62bca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java @@ -83,8 +83,6 @@ public class MultiAttemptsHttpConnector { throw new CollectorException(msg); } - log.info("Request attempt {} [{}]", retryNumber, requestUrl); - InputStream input = null; try { @@ -104,9 +102,9 @@ public class MultiAttemptsHttpConnector { urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", getAuthToken())); } - if (log.isDebugEnabled()) { - logHeaderFields(urlConn); - } +// if (log.isDebugEnabled()) { +// logHeaderFields(urlConn); +// } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); if (is2xx(urlConn.getResponseCode())) { From da1cf561e6b89da40d4d147b6de4103fd84a475c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 25 Aug 2022 11:57:20 +0200 Subject: [PATCH 4/4] alignment with beta --- .../eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 30928fdd0..98b5b708e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -498,9 +498,6 @@ class CrossrefMappingTest { assertNotNull(pub.getJournal.getIssnOnline) assertNotNull(pub.getJournal.getName) - - - } @Test