From ee34cc51c3914c73bc7344424cda747810e691cb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 1 Apr 2021 17:07:49 +0200 Subject: [PATCH] [ORCID-no-doi] integrating PR#98 https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/98 --- .../dnetlib/dhp/schema/oaf/Relation.java.rej | 31 + .../dhp/schema/orcid/AuthorHistory.java | 79 ++ .../dhp/schema/orcid/AuthorSummary.java | 25 + .../dhp/schema/orcid}/Contributor.java | 8 +- .../dnetlib/dhp/schema/orcid}/ExternalId.java | 6 +- .../dnetlib/dhp/schema/orcid/OrcidData.java | 34 + .../dhp/schema/orcid}/PublicationDate.java | 6 +- .../eu/dnetlib/dhp/schema/orcid/Summary.java | 79 ++ .../eu/dnetlib/dhp/schema/orcid/Work.java | 16 + .../dnetlib/dhp/schema/orcid/WorkDetail.java | 9 +- .../doiboost/orcid/OrcidDownloader.java | 208 --- .../orcid/SparkDownloadOrcidAuthors.java | 113 +- .../orcid/SparkDownloadOrcidAuthors.java.rej | 30 + .../orcid/SparkDownloadOrcidWorks.java | 251 ++++ .../orcid/SparkGenLastModifiedSeq.java | 15 +- .../orcid/SparkGenerateDoiAuthorList.java | 46 +- .../orcid/SparkUpdateOrcidAuthors.java | 242 ++++ .../orcid/SparkUpdateOrcidDatasets.java | 317 +++++ .../doiboost/orcid/SparkUpdateOrcidWorks.java | 186 +++ .../doiboost/orcid/json/JsonHelper.java | 4 +- .../dnetlib/doiboost/orcid/util/HDFSUtil.java | 67 + .../doiboost/orcid/xml/XMLRecordParser.java | 208 ++- .../orcidnodoi/ActivitiesDumpReader.java | 18 +- .../SparkGenEnrichedOrcidWorks.java | 111 +- .../doiboost/orcidnodoi/json/JsonWriter.java | 4 + .../orcidnodoi/oaf/PublicationToOaf.java | 148 +- .../orcidnodoi/oaf/PublicationToOaf.java.rej | 77 ++ .../orcidnodoi/similarity/AuthorMatcher.java | 6 +- .../orcidnodoi/xml/XMLRecordParserNoDoi.java | 12 +- .../gen_doi_author_list_orcid_parameters.json | 2 + ...ters.json => gen_orcid-no-doi_params.json} | 3 +- .../oozie_app/config-default.xml | 18 - .../oozie_app/workflow.xml | 148 +- .../orcid_update/oozie_app/workflow.xml | 163 +++ .../oozie_app/config-default.xml | 22 - .../oozie_app/workflow.xml | 206 ++- .../orcidnodoi/mappings/typologies.json | 26 +- .../orcidnodoi/oozie_app/workflow.xml | 19 +- .../doiboost/orcid/OrcidClientTest.java | 255 ++-- .../orcid/xml/XMLRecordParserTest.java | 80 +- .../orcidnodoi/xml/OrcidNoDoiTest.java | 14 +- ...0000-0002-6664-7451_work.compressed.base64 | 1 + .../0000-0003-3028-6161.compressed.base64 | 2 +- .../orcid/xml/record_0000-0001-5004-5918.xml | 1202 +++++++++++++++++ .../orcid/xml/record_8888-8888-8888-8880.xml | 2 +- 45 files changed, 3844 insertions(+), 675 deletions(-) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java.rej create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java rename {dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid}/Contributor.java (84%) rename {dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid}/ExternalId.java (82%) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java rename {dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid}/PublicationDate.java (80%) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Summary.java create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java rename dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java (86%) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java.rej create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java.rej rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{gen_enriched_orcid_works_parameters.json => gen_orcid-no-doi_params.json} (57%) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_update/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0002-6664-7451_work.compressed.base64 create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_0000-0001-5004-5918.xml diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java.rej b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java.rej new file mode 100644 index 000000000..7ce658877 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java.rej @@ -0,0 +1,31 @@ +diff a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java (rejected hunks) +@@ -1,8 +1,6 @@ + + package eu.dnetlib.dhp.schema.oaf; + +-import eu.dnetlib.dhp.schema.common.ModelSupport; +- + import static com.google.common.base.Preconditions.checkArgument; + + import java.text.ParseException; +@@ -10,6 +8,8 @@ import java.util.*; + import java.util.stream.Collectors; + import java.util.stream.Stream; + ++import eu.dnetlib.dhp.schema.common.ModelSupport; ++ + /** + * Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to + * graph node identifiers and it is further characterised by the semantic of the link through the fields relType, +@@ -137,7 +137,10 @@ public class Relation extends Oaf { + try { + setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate())); + } catch (ParseException e) { +- throw new IllegalArgumentException(String.format("invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), getValidationDate())); ++ throw new IllegalArgumentException(String ++ .format( ++ "invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), ++ getValidationDate())); + } + + super.mergeFrom(r); diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java new file mode 100644 index 000000000..554aae82c --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java @@ -0,0 +1,79 @@ + +package eu.dnetlib.dhp.schema.orcid; + +import java.io.Serializable; + +public class AuthorHistory implements Serializable { + private String creationMethod; + private String completionDate; + private String submissionDate; + private String lastModifiedDate; + private boolean claimed; + private String deactivationDate; + private boolean verifiedEmail; + private boolean verifiedPrimaryEmail; + + public String getCreationMethod() { + return creationMethod; + } + + public void setCreationMethod(String creationMethod) { + this.creationMethod = creationMethod; + } + + public String getCompletionDate() { + return completionDate; + } + + public void setCompletionDate(String completionDate) { + this.completionDate = completionDate; + } + + public String getSubmissionDate() { + return submissionDate; + } + + public void setSubmissionDate(String submissionDate) { + this.submissionDate = submissionDate; + } + + public String getLastModifiedDate() { + return lastModifiedDate; + } + + public void setLastModifiedDate(String lastModifiedDate) { + this.lastModifiedDate = lastModifiedDate; + } + + public boolean isClaimed() { + return claimed; + } + + public void setClaimed(boolean claimed) { + this.claimed = claimed; + } + + public String getDeactivationDate() { + return deactivationDate; + } + + public void setDeactivationDate(String deactivationDate) { + this.deactivationDate = deactivationDate; + } + + public boolean isVerifiedEmail() { + return verifiedEmail; + } + + public void setVerifiedEmail(boolean verifiedEmail) { + this.verifiedEmail = verifiedEmail; + } + + public boolean isVerifiedPrimaryEmail() { + return verifiedPrimaryEmail; + } + + public void setVerifiedPrimaryEmail(boolean verifiedPrimaryEmail) { + this.verifiedPrimaryEmail = verifiedPrimaryEmail; + } +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java new file mode 100644 index 000000000..794576628 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.schema.orcid; + +import java.io.Serializable; + +public class AuthorSummary extends OrcidData implements Serializable { + private AuthorData authorData; + private AuthorHistory authorHistory; + + public AuthorData getAuthorData() { + return authorData; + } + + public void setAuthorData(AuthorData authorData) { + this.authorData = authorData; + } + + public AuthorHistory getAuthorHistory() { + return authorHistory; + } + + public void setAuthorHistory(AuthorHistory authorHistory) { + this.authorHistory = authorHistory; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Contributor.java similarity index 84% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Contributor.java index 9222c1cc4..3b543db4b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Contributor.java @@ -1,5 +1,5 @@ -package eu.dnetlib.doiboost.orcidnodoi.model; +package eu.dnetlib.dhp.schema.orcid; import java.io.Serializable; @@ -12,9 +12,9 @@ import eu.dnetlib.dhp.schema.orcid.AuthorData; public class Contributor extends AuthorData implements Serializable { private String sequence; private String role; - private transient boolean simpleMatch = false; - private transient Double score = 0.0; - private transient boolean bestMatch = false; + private transient boolean simpleMatch; + private transient Double score; + private transient boolean bestMatch; public String getSequence() { return sequence; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/ExternalId.java similarity index 82% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/ExternalId.java index 7fe50ce25..d8f001aa5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/ExternalId.java @@ -1,11 +1,13 @@ -package eu.dnetlib.doiboost.orcidnodoi.model; +package eu.dnetlib.dhp.schema.orcid; + +import java.io.Serializable; /** * This class models the data related to external id, that are retrieved from an orcid publication */ -public class ExternalId { +public class ExternalId implements Serializable { private String type; private String value; private String relationShip; diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java new file mode 100644 index 000000000..606eea6a8 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.schema.orcid; + +import java.io.Serializable; + +public class OrcidData implements Serializable { + protected String base64CompressData; + protected String statusCode; + protected String downloadDate; + + public String getBase64CompressData() { + return base64CompressData; + } + + public void setBase64CompressData(String base64CompressData) { + this.base64CompressData = base64CompressData; + } + + public String getStatusCode() { + return statusCode; + } + + public void setStatusCode(String statusCode) { + this.statusCode = statusCode; + } + + public String getDownloadDate() { + return downloadDate; + } + + public void setDownloadDate(String downloadDate) { + this.downloadDate = downloadDate; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/PublicationDate.java similarity index 80% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/PublicationDate.java index 5f794d8eb..01972ce95 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/PublicationDate.java @@ -1,11 +1,13 @@ -package eu.dnetlib.doiboost.orcidnodoi.model; +package eu.dnetlib.dhp.schema.orcid; + +import java.io.Serializable; /** * This class models the data related to a publication date, that are retrieved from an orcid publication */ -public class PublicationDate { +public class PublicationDate implements Serializable { private String year; private String month; private String day; diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Summary.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Summary.java new file mode 100644 index 000000000..ffebf5021 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Summary.java @@ -0,0 +1,79 @@ + +package eu.dnetlib.dhp.schema.orcid; + +import java.io.Serializable; + +public class Summary implements Serializable { + private String creationMethod; + private String completionDate; + private String submissionDate; + private String lastModifiedDate; + private boolean claimed; + private String deactivationDate; + private boolean verifiedEmail; + private boolean verifiedPrimaryEmail; + + public String getCreationMethod() { + return creationMethod; + } + + public void setCreationMethod(String creationMethod) { + this.creationMethod = creationMethod; + } + + public String getCompletionDate() { + return completionDate; + } + + public void setCompletionDate(String completionDate) { + this.completionDate = completionDate; + } + + public String getSubmissionDate() { + return submissionDate; + } + + public void setSubmissionDate(String submissionDate) { + this.submissionDate = submissionDate; + } + + public String getLastModifiedDate() { + return lastModifiedDate; + } + + public void setLastModifiedDate(String lastModifiedDate) { + this.lastModifiedDate = lastModifiedDate; + } + + public boolean isClaimed() { + return claimed; + } + + public void setClaimed(boolean claimed) { + this.claimed = claimed; + } + + public String getDeactivationDate() { + return deactivationDate; + } + + public void setDeactivationDate(String deactivationDate) { + this.deactivationDate = deactivationDate; + } + + public boolean isVerifiedEmail() { + return verifiedEmail; + } + + public void setVerifiedEmail(boolean verifiedEmail) { + this.verifiedEmail = verifiedEmail; + } + + public boolean isVerifiedPrimaryEmail() { + return verifiedPrimaryEmail; + } + + public void setVerifiedPrimaryEmail(boolean verifiedPrimaryEmail) { + this.verifiedPrimaryEmail = verifiedPrimaryEmail; + } +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java new file mode 100644 index 000000000..c557eb5d2 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.schema.orcid; + +import java.io.Serializable; + +public class Work extends OrcidData implements Serializable { + WorkDetail workDetail; + + public WorkDetail getWorkDetail() { + return workDetail; + } + + public void setWorkDetail(WorkDetail workDetail) { + this.workDetail = workDetail; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java similarity index 86% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java index 58f992d12..614d415c1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java @@ -1,14 +1,19 @@ -package eu.dnetlib.doiboost.orcidnodoi.model; +package eu.dnetlib.dhp.schema.orcid; import java.io.Serializable; import java.util.List; +import eu.dnetlib.dhp.schema.orcid.Contributor; +import eu.dnetlib.dhp.schema.orcid.ExternalId; +import eu.dnetlib.dhp.schema.orcid.OrcidData; +import eu.dnetlib.dhp.schema.orcid.PublicationDate; + /** * This class models the data that are retrieved from orcid publication */ -public class WorkDataNoDoi implements Serializable { +public class WorkDetail implements Serializable { private String oid; private String id; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java deleted file mode 100644 index be727ab9f..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java +++ /dev/null @@ -1,208 +0,0 @@ - -package eu.dnetlib.doiboost.orcid; - -import java.io.*; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.List; - -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.mortbay.log.Log; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class OrcidDownloader extends OrcidDSManager { - - static final int REQ_LIMIT = 24; - static final int REQ_MAX_TEST = -1; - static final int RECORD_PARSED_COUNTER_LOG_INTERVAL = 500; - static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; - static final String lastUpdate = "2020-09-29 00:00:00"; - private String lambdaFileName; - private String outputPath; - private String token; - - public static void main(String[] args) throws IOException, Exception { - OrcidDownloader orcidDownloader = new OrcidDownloader(); - orcidDownloader.loadArgs(args); - orcidDownloader.parseLambdaFile(); - } - - private String downloadRecord(String orcidId) throws IOException { - try (CloseableHttpClient client = HttpClients.createDefault()) { - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); - CloseableHttpResponse response = client.execute(httpGet); - if (response.getStatusLine().getStatusCode() != 200) { - Log - .info( - "Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); - return new String(""); - } -// return IOUtils.toString(response.getEntity().getContent()); - return xmlStreamToString(response.getEntity().getContent()); - } - } - - private String xmlStreamToString(InputStream xmlStream) throws IOException { - BufferedReader br = new BufferedReader(new InputStreamReader(xmlStream)); - String line; - StringBuffer buffer = new StringBuffer(); - while ((line = br.readLine()) != null) { - buffer.append(line); - } - return buffer.toString(); - } - - public void parseLambdaFile() throws Exception { - int parsedRecordsCounter = 0; - int downloadedRecordsCounter = 0; - int savedRecordsCounter = 0; - long startDownload = 0; - Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); - String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); - Path hdfsreadpath = new Path(lambdaFileUri); - FSDataInputStream lambdaFileStream = fs.open(hdfsreadpath); - Path hdfsoutputPath = new Path( - hdfsServerUri - .concat(workingPath) - .concat(outputPath) - .concat("updated_xml_authors.seq")); - try (TarArchiveInputStream tais = new TarArchiveInputStream( - new GzipCompressorInputStream(lambdaFileStream))) { - TarArchiveEntry entry = null; - StringBuilder sb = new StringBuilder(); - try (SequenceFile.Writer writer = SequenceFile - .createWriter( - conf, - SequenceFile.Writer.file(hdfsoutputPath), - SequenceFile.Writer.keyClass(Text.class), - SequenceFile.Writer.valueClass(Text.class), - SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) { - startDownload = System.currentTimeMillis(); - while ((entry = tais.getNextTarEntry()) != null) { - BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from tarInput - String line; - while ((line = br.readLine()) != null) { - String[] values = line.split(","); - List recordInfo = Arrays.asList(values); - int nReqTmp = 0; - long startReqTmp = System.currentTimeMillis(); - // skip headers line - if (parsedRecordsCounter == 0) { - parsedRecordsCounter++; - continue; - } - parsedRecordsCounter++; - String orcidId = recordInfo.get(0); - if (isModified(orcidId, recordInfo.get(3))) { - String record = downloadRecord(orcidId); - downloadedRecordsCounter++; - if (!record.isEmpty()) { -// String compressRecord = ArgumentApplicationParser.compressArgument(record); - final Text key = new Text(recordInfo.get(0)); - final Text value = new Text(record); - writer.append(key, value); - savedRecordsCounter++; - } - } else { - break; - } - long endReq = System.currentTimeMillis(); - nReqTmp++; - if (nReqTmp == REQ_LIMIT) { - long reqSessionDuration = endReq - startReqTmp; - if (reqSessionDuration <= 1000) { - Log - .info( - "\nreqSessionDuration: " - + reqSessionDuration - + " nReqTmp: " - + nReqTmp - + " wait ...."); - Thread.sleep(1000 - reqSessionDuration); - } else { - nReqTmp = 0; - startReqTmp = System.currentTimeMillis(); - } - } - if ((parsedRecordsCounter % RECORD_PARSED_COUNTER_LOG_INTERVAL) == 0) { - Log - .info( - "Current parsed: " - + parsedRecordsCounter - + " downloaded: " - + downloadedRecordsCounter - + " saved: " - + savedRecordsCounter); - if (REQ_MAX_TEST != -1 && parsedRecordsCounter > REQ_MAX_TEST) { - break; - } - } - } - long endDownload = System.currentTimeMillis(); - long downloadTime = endDownload - startDownload; - Log.info("Download time: " + ((downloadTime / 1000) / 60) + " minutes"); - } - } - } - Log.info("Download started at: " + new Date(startDownload).toString()); - Log.info("Download ended at: " + new Date(System.currentTimeMillis()).toString()); - Log.info("Parsed Records Counter: " + parsedRecordsCounter); - Log.info("Downloaded Records Counter: " + downloadedRecordsCounter); - Log.info("Saved Records Counter: " + savedRecordsCounter); - } - - private void loadArgs(String[] args) throws IOException, Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - OrcidDownloader.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/download_orcid_data.json"))); - parser.parseArgument(args); - - hdfsServerUri = parser.get("hdfsServerUri"); - Log.info("HDFS URI: " + hdfsServerUri); - workingPath = parser.get("workingPath"); - Log.info("Default Path: " + workingPath); - lambdaFileName = parser.get("lambdaFileName"); - Log.info("Lambda File Name: " + lambdaFileName); - outputPath = parser.get("outputPath"); - Log.info("Output Data: " + outputPath); - token = parser.get("token"); - } - - public boolean isModified(String orcidId, String modifiedDate) { - Date modifiedDateDt = null; - Date lastUpdateDt = null; - try { - if (modifiedDate.length() != 19) { - modifiedDate = modifiedDate.substring(0, 19); - } - modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate); - lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate); - } catch (Exception e) { - Log.info("[" + orcidId + "] Parsing date: ", e.getMessage()); - return true; - } - return modifiedDateDt.after(lastUpdateDt); - } -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 1422a0840..8cf070213 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -8,6 +8,7 @@ import java.util.Date; import java.util.Optional; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.http.client.methods.CloseableHttpResponse; @@ -24,13 +25,13 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import scala.Tuple2; public class SparkDownloadOrcidAuthors { static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidAuthors.class); static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; - static final String lastUpdate = "2020-09-29 00:00:00"; public static void main(String[] args) throws Exception { @@ -53,18 +54,25 @@ public class SparkDownloadOrcidAuthors { final String token = parser.get("token"); final String lambdaFileName = parser.get("lambdaFileName"); logger.info("lambdaFileName: {}", lambdaFileName); + final String hdfsServerUri = parser.get("hdfsServerUri"); SparkConf conf = new SparkConf(); runWithSparkSession( conf, isSparkSessionManaged, spark -> { + String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); + logger.info("lastUpdate: {}", lastUpdate); + if (StringUtils.isBlank(lastUpdate)) { + throw new RuntimeException("last update info not found"); + } JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records"); LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("to_download_records"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); + LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); @@ -73,13 +81,14 @@ public class SparkDownloadOrcidAuthors { logger.info("Retrieving data from lamda sequence file"); JavaPairRDD lamdaFileRDD = sc .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); - logger.info("Data retrieved: " + lamdaFileRDD.count()); + final long lamdaFileRDDCount = lamdaFileRDD.count(); + logger.info("Data retrieved: " + lamdaFileRDDCount); Function, Boolean> isModifiedAfterFilter = data -> { String orcidId = data._1().toString(); String lastModifiedDate = data._2().toString(); parsedRecordsAcc.add(1); - if (isModified(orcidId, lastModifiedDate)) { + if (isModified(orcidId, lastModifiedDate, lastUpdate)) { modifiedRecordsAcc.add(1); return true; } @@ -92,49 +101,42 @@ public class SparkDownloadOrcidAuthors { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastModifiedDate); - try (CloseableHttpClient client = HttpClients.createDefault()) { - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); - long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); - long endReq = System.currentTimeMillis(); - long reqTime = endReq - startReq; - if (reqTime < 1000) { - Thread.sleep(1000 - reqTime); + CloseableHttpClient client = HttpClients.createDefault(); + HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); + httpGet.addHeader("Accept", "application/vnd.orcid+xml"); + httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + long startReq = System.currentTimeMillis(); + CloseableHttpResponse response = client.execute(httpGet); + long endReq = System.currentTimeMillis(); + long reqTime = endReq - startReq; + if (reqTime < 1000) { + Thread.sleep(1000 - reqTime); + } + int statusCode = response.getStatusLine().getStatusCode(); + downloaded.setStatusCode(statusCode); + if (statusCode != 200) { + switch (statusCode) { + case 403: + errorHTTP403Acc.add(1); + case 404: + errorHTTP404Acc.add(1); + case 409: + errorHTTP409Acc.add(1); + case 503: + errorHTTP503Acc.add(1); + case 525: + errorHTTP525Acc.add(1); + default: + errorHTTPGenericAcc.add(1); } - int statusCode = response.getStatusLine().getStatusCode(); - downloaded.setStatusCode(statusCode); - if (statusCode != 200) { - switch (statusCode) { - case 403: - errorHTTP403Acc.add(1); - case 409: - errorHTTP409Acc.add(1); - case 503: - errorHTTP503Acc.add(1); - throw new RuntimeException("Orcid request rate limit reached (HTTP 503)"); - case 525: - errorHTTP525Acc.add(1); - default: - errorHTTPGenericAcc.add(1); - logger - .info( - "Downloading " + orcidId + " status code: " - + response.getStatusLine().getStatusCode()); - } - return downloaded.toTuple2(); - } - downloadedRecordsAcc.add(1); - downloaded - .setCompressedData( - ArgumentApplicationParser - .compressArgument(IOUtils.toString(response.getEntity().getContent()))); - } catch (Throwable e) { - logger.info("Downloading " + orcidId, e.getMessage()); - downloaded.setErrorMessage(e.getMessage()); return downloaded.toTuple2(); } + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(IOUtils.toString(response.getEntity().getContent()))); + client.close(); return downloaded.toTuple2(); }; @@ -142,10 +144,12 @@ public class SparkDownloadOrcidAuthors { logger.info("Start execution ..."); JavaPairRDD authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter); - logger.info("Authors modified count: " + authorsModifiedRDD.count()); + long authorsModifiedCount = authorsModifiedRDD.count(); + logger.info("Authors modified count: " + authorsModifiedCount); + logger.info("Start downloading ..."); authorsModifiedRDD - .repartition(10) + .repartition(100) .map(downloadRecordFunction) .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( @@ -154,10 +158,12 @@ public class SparkDownloadOrcidAuthors { Text.class, SequenceFileOutputFormat.class, sc.hadoopConfiguration()); + logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); + logger.info("errorHTTP404Acc: " + errorHTTP404Acc.value().toString()); logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); @@ -166,18 +172,27 @@ public class SparkDownloadOrcidAuthors { } - private static boolean isModified(String orcidId, String modifiedDate) { + public static boolean isModified(String orcidId, String modifiedDate, String lastUpdate) { Date modifiedDateDt; Date lastUpdateDt; + String lastUpdateRedux = ""; try { + if (modifiedDate.equals("last_modified")) { + return false; + } if (modifiedDate.length() != 19) { modifiedDate = modifiedDate.substring(0, 19); } + if (lastUpdate.length() != 19) { + lastUpdateRedux = lastUpdate.substring(0, 19); + } else { + lastUpdateRedux = lastUpdate; + } modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate); - lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate); + lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdateRedux); } catch (Exception e) { - logger.info("[" + orcidId + "] Parsing date: ", e.getMessage()); - return true; + throw new RuntimeException("[" + orcidId + "] modifiedDate <" + modifiedDate + "> lastUpdate <" + lastUpdate + + "> Parsing date: " + e.getMessage()); } return modifiedDateDt.after(lastUpdateDt); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java.rej b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java.rej new file mode 100644 index 000000000..fc22d8a7a --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java.rej @@ -0,0 +1,30 @@ +diff a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java (rejected hunks) +@@ -31,7 +32,6 @@ public class SparkDownloadOrcidAuthors { + + static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidAuthors.class); + static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; +- static String lastUpdate; + + public static void main(String[] args) throws Exception { + +@@ -54,14 +54,18 @@ public class SparkDownloadOrcidAuthors { + final String token = parser.get("token"); + final String lambdaFileName = parser.get("lambdaFileName"); + logger.info("lambdaFileName: {}", lambdaFileName); +- +- lastUpdate = HDFSUtil.readFromTextFile(workingPath.concat("last_update.txt")); ++ final String hdfsServerUri = parser.get("hdfsServerUri"); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { ++ String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); ++ logger.info("lastUpdate: ", lastUpdate); ++ if (StringUtils.isBlank(lastUpdate)) { ++ throw new RuntimeException("last update info not found"); ++ } + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java new file mode 100644 index 000000000..57ca2aa71 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -0,0 +1,251 @@ + +package eu.dnetlib.doiboost.orcid; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.*; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.util.LongAccumulator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; +import scala.Tuple2; + +public class SparkDownloadOrcidWorks { + + static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidWorks.class); + public static final String LAMBDA_FILE_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; + public static final DateTimeFormatter LAMBDA_FILE_DATE_FORMATTER = DateTimeFormatter + .ofPattern(LAMBDA_FILE_DATE_FORMAT); + public static final String ORCID_XML_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; + public static final DateTimeFormatter ORCID_XML_DATETIMEFORMATTER = DateTimeFormatter + .ofPattern(ORCID_XML_DATETIME_FORMAT); + + public static void main(String[] args) throws IOException, Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkDownloadOrcidWorks.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_orcid_data.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final String workingPath = parser.get("workingPath"); + logger.info("workingPath: ", workingPath); + final String outputPath = parser.get("outputPath"); + final String token = parser.get("token"); + final String hdfsServerUri = parser.get("hdfsServerUri"); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + final String lastUpdateValue = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); + logger.info("lastUpdateValue: ", lastUpdateValue); + + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + LongAccumulator updatedAuthorsAcc = spark.sparkContext().longAccumulator("updated_authors"); + LongAccumulator parsedAuthorsAcc = spark.sparkContext().longAccumulator("parsed_authors"); + LongAccumulator parsedWorksAcc = spark.sparkContext().longAccumulator("parsed_works"); + LongAccumulator modifiedWorksAcc = spark.sparkContext().longAccumulator("modified_works"); + LongAccumulator maxModifiedWorksLimitAcc = spark + .sparkContext() + .longAccumulator("max_modified_works_limit"); + LongAccumulator errorCodeFoundAcc = spark.sparkContext().longAccumulator("error_code_found"); + LongAccumulator errorLoadingJsonFoundAcc = spark + .sparkContext() + .longAccumulator("error_loading_json_found"); + LongAccumulator errorLoadingXMLFoundAcc = spark + .sparkContext() + .longAccumulator("error_loading_xml_found"); + LongAccumulator errorParsingXMLFoundAcc = spark + .sparkContext() + .longAccumulator("error_parsing_xml_found"); + LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); + LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); + LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); + LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); + LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); + LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); + LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + + JavaPairRDD updatedAuthorsRDD = sc + .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); + updatedAuthorsAcc.setValue(updatedAuthorsRDD.count()); + + FlatMapFunction, String> retrieveWorkUrlFunction = data -> { + String orcidId = data._1().toString(); + String jsonData = data._2().toString(); + List workIds = new ArrayList<>(); + Map workIdLastModifiedDate = new HashMap<>(); + JsonElement jElement = new JsonParser().parse(jsonData); + String statusCode = getJsonValue(jElement, "statusCode"); + if (statusCode.equals("200")) { + String compressedData = getJsonValue(jElement, "compressedData"); + if (StringUtils.isEmpty(compressedData)) { + errorLoadingJsonFoundAcc.add(1); + } else { + String authorSummary = ArgumentApplicationParser.decompressValue(compressedData); + if (StringUtils.isEmpty(authorSummary)) { + errorLoadingXMLFoundAcc.add(1); + } else { + try { + workIdLastModifiedDate = XMLRecordParser + .retrieveWorkIdLastModifiedDate(authorSummary.getBytes()); + } catch (Exception e) { + logger.error("parsing " + orcidId + " [" + jsonData + "]", e); + errorParsingXMLFoundAcc.add(1); + } + } + } + } else { + errorCodeFoundAcc.add(1); + } + parsedAuthorsAcc.add(1); + workIdLastModifiedDate.forEach((k, v) -> { + parsedWorksAcc.add(1); + if (isModified(orcidId, v, lastUpdateValue)) { + modifiedWorksAcc.add(1); + workIds.add(orcidId.concat("/work/").concat(k)); + } + }); + if (workIdLastModifiedDate.size() > 50) { + maxModifiedWorksLimitAcc.add(1); + } + return workIds.iterator(); + }; + + Function> downloadWorkFunction = data -> { + String relativeWorkUrl = data; + String orcidId = relativeWorkUrl.split("/")[0]; + final DownloadedRecordData downloaded = new DownloadedRecordData(); + downloaded.setOrcidId(orcidId); + downloaded.setLastModifiedDate(lastUpdateValue); + CloseableHttpClient client = HttpClients.createDefault(); + HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + relativeWorkUrl); + httpGet.addHeader("Accept", "application/vnd.orcid+xml"); + httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + long startReq = System.currentTimeMillis(); + CloseableHttpResponse response = client.execute(httpGet); + long endReq = System.currentTimeMillis(); + long reqTime = endReq - startReq; + if (reqTime < 1000) { + Thread.sleep(1000 - reqTime); + } + int statusCode = response.getStatusLine().getStatusCode(); + downloaded.setStatusCode(statusCode); + if (statusCode != 200) { + switch (statusCode) { + case 403: + errorHTTP403Acc.add(1); + case 404: + errorHTTP404Acc.add(1); + case 409: + errorHTTP409Acc.add(1); + case 503: + errorHTTP503Acc.add(1); + case 525: + errorHTTP525Acc.add(1); + default: + errorHTTPGenericAcc.add(1); + logger + .info( + "Downloading " + orcidId + " status code: " + + response.getStatusLine().getStatusCode()); + } + return downloaded.toTuple2(); + } + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(IOUtils.toString(response.getEntity().getContent()))); + client.close(); + return downloaded.toTuple2(); + }; + + updatedAuthorsRDD + .flatMap(retrieveWorkUrlFunction) + .repartition(100) + .map(downloadWorkFunction) + .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); + + logger.info("updatedAuthorsAcc: " + updatedAuthorsAcc.value().toString()); + logger.info("parsedAuthorsAcc: " + parsedAuthorsAcc.value().toString()); + logger.info("parsedWorksAcc: " + parsedWorksAcc.value().toString()); + logger.info("modifiedWorksAcc: " + modifiedWorksAcc.value().toString()); + logger.info("maxModifiedWorksLimitAcc: " + maxModifiedWorksLimitAcc.value().toString()); + logger.info("errorCodeFoundAcc: " + errorCodeFoundAcc.value().toString()); + logger.info("errorLoadingJsonFoundAcc: " + errorLoadingJsonFoundAcc.value().toString()); + logger.info("errorLoadingXMLFoundAcc: " + errorLoadingXMLFoundAcc.value().toString()); + logger.info("errorParsingXMLFoundAcc: " + errorParsingXMLFoundAcc.value().toString()); + logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); + logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); + logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); + logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); + logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); + logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + }); + + } + + public static boolean isModified(String orcidId, String modifiedDateValue, String lastUpdateValue) { + LocalDate modifiedDate = null; + LocalDate lastUpdate = null; + try { + modifiedDate = LocalDate.parse(modifiedDateValue, SparkDownloadOrcidWorks.ORCID_XML_DATETIMEFORMATTER); + if (lastUpdateValue.length() != 19) { + lastUpdateValue = lastUpdateValue.substring(0, 19); + } + lastUpdate = LocalDate + .parse(lastUpdateValue, SparkDownloadOrcidWorks.LAMBDA_FILE_DATE_FORMATTER); + } catch (Exception e) { + logger.info("[" + orcidId + "] Parsing date: ", e.getMessage()); + throw new RuntimeException("[" + orcidId + "] Parsing date: " + e.getMessage()); + } + return modifiedDate.isAfter(lastUpdate); + } + + private static String getJsonValue(JsonElement jElement, String property) { + if (jElement.getAsJsonObject().has(property)) { + JsonElement name = null; + name = jElement.getAsJsonObject().get(property); + if (name != null && !name.isJsonNull()) { + return name.getAsString(); + } + } + return new String(""); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java index f710635ab..d146f712a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java @@ -3,9 +3,7 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; +import java.io.*; import java.net.URI; import java.util.Arrays; import java.util.List; @@ -17,6 +15,7 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; @@ -26,6 +25,7 @@ import org.apache.spark.SparkConf; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.util.HDFSUtil; public class SparkGenLastModifiedSeq { private static String hdfsServerUri; @@ -50,6 +50,7 @@ public class SparkGenLastModifiedSeq { outputPath = parser.get("outputPath"); lambdaFileName = parser.get("lambdaFileName"); String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); + String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; SparkConf sparkConf = new SparkConf(); runWithSparkSession( @@ -57,6 +58,7 @@ public class SparkGenLastModifiedSeq { isSparkSessionManaged, spark -> { int rowsNum = 0; + String lastModifiedAuthorDate = ""; Path output = new Path( hdfsServerUri .concat(workingPath) @@ -89,10 +91,17 @@ public class SparkGenLastModifiedSeq { final Text value = new Text(recordInfo.get(3)); writer.append(key, value); rowsNum++; + if (rowsNum == 2) { + lastModifiedAuthorDate = value.toString(); + } } + } } } + HDFSUtil + .writeToTextFile( + hdfsServerUri, workingPath, lastModifiedDateFromLambdaFileUri, lastModifiedAuthorDate); Log.info("Saved rows from lamda csv tar file: " + rowsNum); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java index 011c153ec..d831f8509 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java @@ -4,15 +4,13 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -import java.util.Optional; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; @@ -25,13 +23,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.esotericsoftware.minlog.Log; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.orcid.AuthorData; +import eu.dnetlib.dhp.schema.orcid.OrcidDOI; import eu.dnetlib.doiboost.orcid.model.WorkData; +import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import scala.Tuple2; public class SparkGenerateDoiAuthorList { @@ -56,6 +56,10 @@ public class SparkGenerateDoiAuthorList { logger.info("workingPath: ", workingPath); final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath"); logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath); + final String authorsPath = parser.get("authorsPath"); + logger.info("authorsPath: ", authorsPath); + final String xmlWorksPath = parser.get("xmlWorksPath"); + logger.info("xmlWorksPath: ", xmlWorksPath); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -65,17 +69,21 @@ public class SparkGenerateDoiAuthorList { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaPairRDD summariesRDD = sc - .sequenceFile(workingPath + "../orcid_summaries/output/authors.seq", Text.class, Text.class); + .sequenceFile(workingPath.concat(authorsPath), Text.class, Text.class); Dataset summariesDataset = spark .createDataset( summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(), Encoders.bean(AuthorData.class)); - JavaPairRDD activitiesRDD = sc - .sequenceFile(workingPath + "/output/*.seq", Text.class, Text.class); + JavaPairRDD xmlWorksRDD = sc + .sequenceFile(workingPath.concat(xmlWorksPath), Text.class, Text.class); + Dataset activitiesDataset = spark .createDataset( - activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(), + xmlWorksRDD + .map(seq -> XMLRecordParser.VTDParseWorkData(seq._2().toString().getBytes())) + .filter(work -> work != null && work.getErrorCode() == null && work.isDoiFound()) + .rdd(), Encoders.bean(WorkData.class)); Function, Tuple2>> toAuthorListFunction = data -> { @@ -135,13 +143,19 @@ public class SparkGenerateDoiAuthorList { } return null; }) - .mapToPair( - s -> { - ObjectMapper mapper = new ObjectMapper(); - return new Tuple2<>(s._1(), mapper.writeValueAsString(s._2())); - }) - .repartition(10) - .saveAsTextFile(workingPath + outputDoiAuthorListPath); + .mapToPair(s -> { + List authorList = s._2(); + Set oidsAlreadySeen = new HashSet<>(); + authorList.removeIf(a -> !oidsAlreadySeen.add(a.getOid())); + return new Tuple2<>(s._1(), authorList); + }) + .map(s -> { + OrcidDOI orcidDOI = new OrcidDOI(); + orcidDOI.setDoi(s._1()); + orcidDOI.setAuthors(s._2()); + return JsonWriter.create(orcidDOI); + }) + .saveAsTextFile(workingPath + outputDoiAuthorListPath, GzipCodec.class); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java new file mode 100644 index 000000000..0eb844fe2 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java @@ -0,0 +1,242 @@ + +package eu.dnetlib.doiboost.orcid; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static org.apache.spark.sql.functions.*; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.util.LongAccumulator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.orcid.AuthorSummary; +import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; +import scala.Tuple2; + +public class SparkUpdateOrcidAuthors { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .setSerializationInclusion(JsonInclude.Include.NON_NULL); + + public static void main(String[] args) throws IOException, Exception { + Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateOrcidAuthors.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_orcid_data.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + final String workingPath = parser.get("workingPath"); +// final String outputPath = parser.get("outputPath"); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + LongAccumulator oldAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("old_authors_found"); + LongAccumulator updatedAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("updated_authors_found"); + LongAccumulator newAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("new_authors_found"); + LongAccumulator errorCodeAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("error_code_authors_found"); + LongAccumulator errorLoadingAuthorsJsonFoundAcc = spark + .sparkContext() + .longAccumulator("error_loading_authors_json_found"); + LongAccumulator errorParsingAuthorsXMLFoundAcc = spark + .sparkContext() + .longAccumulator("error_parsing_authors_xml_found"); + + Function, AuthorSummary> retrieveAuthorSummaryFunction = data -> { + AuthorSummary authorSummary = new AuthorSummary(); + String orcidId = data._1().toString(); + String jsonData = data._2().toString(); + JsonElement jElement = new JsonParser().parse(jsonData); + String statusCode = getJsonValue(jElement, "statusCode"); + String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + if (statusCode.equals("200")) { + String compressedData = getJsonValue(jElement, "compressedData"); + if (StringUtils.isEmpty(compressedData)) { + errorLoadingAuthorsJsonFoundAcc.add(1); + } else { + String xmlAuthor = ArgumentApplicationParser.decompressValue(compressedData); + try { + authorSummary = XMLRecordParser + .VTDParseAuthorSummary(xmlAuthor.getBytes()); + authorSummary.setStatusCode(statusCode); + authorSummary.setDownloadDate(Long.toString(System.currentTimeMillis())); + authorSummary.setBase64CompressData(compressedData); + return authorSummary; + } catch (Exception e) { + logger.error("parsing xml " + orcidId + " [" + jsonData + "]", e); + errorParsingAuthorsXMLFoundAcc.add(1); + } + } + } else { + authorSummary.setStatusCode(statusCode); + authorSummary.setDownloadDate(Long.toString(System.currentTimeMillis())); + errorCodeAuthorsFoundAcc.add(1); + } + return authorSummary; + }; + + Dataset downloadedAuthorSummaryDS = spark + .createDataset( + sc + .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class) + .map(retrieveAuthorSummaryFunction) + .rdd(), + Encoders.bean(AuthorSummary.class)); + Dataset currentAuthorSummaryDS = spark + .createDataset( + sc + .textFile(workingPath.concat("orcid_dataset/authors/*")) + .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) + .rdd(), + Encoders.bean(AuthorSummary.class)); + Dataset mergedAuthorSummaryDS = currentAuthorSummaryDS + .joinWith( + downloadedAuthorSummaryDS, + currentAuthorSummaryDS + .col("authorData.oid") + .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), + "full_outer") + .map(value -> { + Optional opCurrent = Optional.ofNullable(value._1()); + Optional opDownloaded = Optional.ofNullable(value._2()); + if (!opCurrent.isPresent()) { + newAuthorsFoundAcc.add(1); + return opDownloaded.get(); + } + if (!opDownloaded.isPresent()) { + oldAuthorsFoundAcc.add(1); + return opCurrent.get(); + } + if (opCurrent.isPresent() && opDownloaded.isPresent()) { + updatedAuthorsFoundAcc.add(1); + return opDownloaded.get(); + } + return null; + }, + Encoders.bean(AuthorSummary.class)) + .filter(Objects::nonNull); + + long mergedCount = mergedAuthorSummaryDS.count(); + + Dataset base64DedupedDS = mergedAuthorSummaryDS.dropDuplicates("base64CompressData"); + + List dupOids = base64DedupedDS + .groupBy("authorData.oid") + .agg(count("authorData.oid").alias("oidOccurrenceCount")) + .where("oidOccurrenceCount > 1") + .select("oid") + .toJavaRDD() + .map(row -> row.get(0).toString()) + .collect(); + + JavaRDD dupAuthors = base64DedupedDS + .toJavaRDD() + .filter( + authorSummary -> (Objects.nonNull(authorSummary.getAuthorData()) + && Objects.nonNull(authorSummary.getAuthorData().getOid()))) + .filter(authorSummary -> dupOids.contains(authorSummary.getAuthorData().getOid())); + + Dataset dupAuthorSummaryDS = spark + .createDataset( + dupAuthors.rdd(), + Encoders.bean(AuthorSummary.class)); + List> lastModifiedAuthors = dupAuthorSummaryDS + .groupBy("authorData.oid") + .agg(array_max(collect_list("downloadDate"))) + .map( + row -> new Tuple2<>(row.get(0).toString(), row.get(1).toString()), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())) + .toJavaRDD() + .collect(); + + JavaRDD lastDownloadedAuthors = base64DedupedDS + .toJavaRDD() + .filter( + authorSummary -> (Objects.nonNull(authorSummary.getAuthorData()) + && Objects.nonNull(authorSummary.getAuthorData().getOid()))) + .filter(authorSummary -> { + boolean oidFound = lastModifiedAuthors + .stream() + .filter(a -> a._1().equals(authorSummary.getAuthorData().getOid())) + .count() == 1; + boolean tsFound = lastModifiedAuthors + .stream() + .filter( + a -> a._1().equals(authorSummary.getAuthorData().getOid()) && + a._2().equals(authorSummary.getDownloadDate())) + .count() == 1; + return (oidFound && tsFound) || (!oidFound); + }); + + Dataset cleanedDS = spark + .createDataset( + lastDownloadedAuthors.rdd(), + Encoders.bean(AuthorSummary.class)) + .dropDuplicates("downloadDate", "authorData"); + cleanedDS + .toJavaRDD() + .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) + .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); + long cleanedDSCount = cleanedDS.count(); + + logger.info("report_oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); + logger.info("report_newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); + logger.info("report_updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); + logger.info("report_errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); + logger.info("report_errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); + logger.info("report_errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); + logger.info("report_merged_count: " + mergedCount); + logger.info("report_cleaned_count: " + cleanedDSCount); + }); + } + + private static String getJsonValue(JsonElement jElement, String property) { + if (jElement.getAsJsonObject().has(property)) { + JsonElement name = null; + name = jElement.getAsJsonObject().get(property); + if (name != null && !name.isJsonNull()) { + return name.getAsString(); + } + } + return ""; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java new file mode 100644 index 000000000..71c011ebc --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java @@ -0,0 +1,317 @@ + +package eu.dnetlib.doiboost.orcid; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.util.LongAccumulator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.orcid.AuthorSummary; +import eu.dnetlib.dhp.schema.orcid.Work; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; +import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; +import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import scala.Tuple2; + +public class SparkUpdateOrcidDatasets { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .setSerializationInclusion(JsonInclude.Include.NON_NULL); + + public static void main(String[] args) throws IOException, Exception { + Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateOrcidDatasets.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_orcid_data.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + final String workingPath = parser.get("workingPath"); +// final String outputPath = parser.get("outputPath"); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + LongAccumulator oldAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("old_authors_found"); + LongAccumulator updatedAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("updated_authors_found"); + LongAccumulator newAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("new_authors_found"); + LongAccumulator errorCodeAuthorsFoundAcc = spark + .sparkContext() + .longAccumulator("error_code_authors_found"); + LongAccumulator errorLoadingAuthorsJsonFoundAcc = spark + .sparkContext() + .longAccumulator("error_loading_authors_json_found"); + LongAccumulator errorParsingAuthorsXMLFoundAcc = spark + .sparkContext() + .longAccumulator("error_parsing_authors_xml_found"); + + LongAccumulator oldWorksFoundAcc = spark + .sparkContext() + .longAccumulator("old_works_found"); + LongAccumulator updatedWorksFoundAcc = spark + .sparkContext() + .longAccumulator("updated_works_found"); + LongAccumulator newWorksFoundAcc = spark + .sparkContext() + .longAccumulator("new_works_found"); + LongAccumulator errorCodeWorksFoundAcc = spark + .sparkContext() + .longAccumulator("error_code_works_found"); + LongAccumulator errorLoadingWorksJsonFoundAcc = spark + .sparkContext() + .longAccumulator("error_loading_works_json_found"); + LongAccumulator errorParsingWorksXMLFoundAcc = spark + .sparkContext() + .longAccumulator("error_parsing_works_xml_found"); + +// JavaPairRDD xmlSummariesRDD = sc +// .sequenceFile(workingPath.concat("xml/authors/xml_authors.seq"), Text.class, Text.class); +// xmlSummariesRDD +// .map(seq -> { +// AuthorSummary authorSummary = XMLRecordParser +// .VTDParseAuthorSummary(seq._2().toString().getBytes()); +// authorSummary +// .setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); +// return authorSummary; +// }) +// .filter(authorSummary -> authorSummary != null) +// .map(authorSummary -> JsonWriter.create(authorSummary)) +// .saveAsTextFile(workingPath.concat("orcid_dataset/authors"), GzipCodec.class); +// +// JavaPairRDD xmlWorksRDD = sc +// .sequenceFile(workingPath.concat("xml/works/*"), Text.class, Text.class); +// +// xmlWorksRDD +// .map(seq -> { +// WorkDetail workDetail = XMLRecordParserNoDoi.VTDParseWorkData(seq._2().toString().getBytes()); +// Work work = new Work(); +// work.setWorkDetail(workDetail); +// work.setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); +// return work; +// }) +// .filter(work -> work != null) +// .map(work -> JsonWriter.create(work)) +// .saveAsTextFile(workingPath.concat("orcid_dataset/works"), GzipCodec.class); + +// Function, AuthorSummary> retrieveAuthorSummaryFunction = data -> { +// AuthorSummary authorSummary = new AuthorSummary(); +// String orcidId = data._1().toString(); +// String jsonData = data._2().toString(); +// JsonElement jElement = new JsonParser().parse(jsonData); +// String statusCode = getJsonValue(jElement, "statusCode"); +// String downloadDate = getJsonValue(jElement, "lastModifiedDate"); +// if (statusCode.equals("200")) { +// String compressedData = getJsonValue(jElement, "compressedData"); +// if (StringUtils.isEmpty(compressedData)) { +// errorLoadingAuthorsJsonFoundAcc.add(1); +// } else { +// String xmlAuthor = ArgumentApplicationParser.decompressValue(compressedData); +// try { +// authorSummary = XMLRecordParser +// .VTDParseAuthorSummary(xmlAuthor.getBytes()); +// authorSummary.setStatusCode(statusCode); +// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); +// authorSummary.setBase64CompressData(compressedData); +// return authorSummary; +// } catch (Exception e) { +// logger.error("parsing xml " + orcidId + " [" + jsonData + "]", e); +// errorParsingAuthorsXMLFoundAcc.add(1); +// } +// } +// } else { +// authorSummary.setStatusCode(statusCode); +// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); +// errorCodeAuthorsFoundAcc.add(1); +// } +// return authorSummary; +// }; +// +// Dataset downloadedAuthorSummaryDS = spark +// .createDataset( +// sc +// .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class) +// .map(retrieveAuthorSummaryFunction) +// .rdd(), +// Encoders.bean(AuthorSummary.class)); +// Dataset currentAuthorSummaryDS = spark +// .createDataset( +// sc +// .textFile(workingPath.concat("orcid_dataset/authors/*")) +// .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) +// .rdd(), +// Encoders.bean(AuthorSummary.class)); +// currentAuthorSummaryDS +// .joinWith( +// downloadedAuthorSummaryDS, +// currentAuthorSummaryDS +// .col("authorData.oid") +// .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), +// "full_outer") +// .map(value -> { +// Optional opCurrent = Optional.ofNullable(value._1()); +// Optional opDownloaded = Optional.ofNullable(value._2()); +// if (!opCurrent.isPresent()) { +// newAuthorsFoundAcc.add(1); +// return opDownloaded.get(); +// } +// if (!opDownloaded.isPresent()) { +// oldAuthorsFoundAcc.add(1); +// return opCurrent.get(); +// } +// if (opCurrent.isPresent() && opDownloaded.isPresent()) { +// updatedAuthorsFoundAcc.add(1); +// return opDownloaded.get(); +// } +// return null; +// }, +// Encoders.bean(AuthorSummary.class)) +// .filter(Objects::nonNull) +// .toJavaRDD() +// .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) +// .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); +// +// logger.info("oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); +// logger.info("newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); +// logger.info("updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); +// logger.info("errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); +// logger.info("errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); +// logger.info("errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); + + Function retrieveWorkFunction = jsonData -> { + Work work = new Work(); + JsonElement jElement = new JsonParser().parse(jsonData); + String statusCode = getJsonValue(jElement, "statusCode"); + work.setStatusCode(statusCode); + String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + work.setDownloadDate("2020-11-18 00:00:05.644768"); + if (statusCode.equals("200")) { + String compressedData = getJsonValue(jElement, "compressedData"); + if (StringUtils.isEmpty(compressedData)) { + errorLoadingWorksJsonFoundAcc.add(1); + } else { + String xmlWork = ArgumentApplicationParser.decompressValue(compressedData); + try { + WorkDetail workDetail = XMLRecordParserNoDoi + .VTDParseWorkData(xmlWork.getBytes()); + work.setWorkDetail(workDetail); + work.setBase64CompressData(compressedData); + return work; + } catch (Exception e) { + logger.error("parsing xml [" + jsonData + "]", e); + errorParsingWorksXMLFoundAcc.add(1); + } + } + } else { + errorCodeWorksFoundAcc.add(1); + } + return work; + }; + + Dataset downloadedWorksDS = spark + .createDataset( + sc + .textFile(workingPath + "downloads/updated_works/*") + .map(s -> { + return s.substring(21, s.length() - 1); + }) + .map(retrieveWorkFunction) + .rdd(), + Encoders.bean(Work.class)); + Dataset currentWorksDS = spark + .createDataset( + sc + .textFile(workingPath.concat("orcid_dataset/works/*")) + .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) + .rdd(), + Encoders.bean(Work.class)); + currentWorksDS + .joinWith( + downloadedWorksDS, + currentWorksDS + .col("workDetail.id") + .equalTo(downloadedWorksDS.col("workDetail.id")) + .and( + currentWorksDS + .col("workDetail.oid") + .equalTo(downloadedWorksDS.col("workDetail.oid"))), + "full_outer") + .map(value -> { + Optional opCurrent = Optional.ofNullable(value._1()); + Optional opDownloaded = Optional.ofNullable(value._2()); + if (!opCurrent.isPresent()) { + newWorksFoundAcc.add(1); + return opDownloaded.get(); + } + if (!opDownloaded.isPresent()) { + oldWorksFoundAcc.add(1); + return opCurrent.get(); + } + if (opCurrent.isPresent() && opDownloaded.isPresent()) { + updatedWorksFoundAcc.add(1); + return opDownloaded.get(); + } + return null; + }, + Encoders.bean(Work.class)) + .filter(Objects::nonNull) + .toJavaRDD() + .map(work -> OBJECT_MAPPER.writeValueAsString(work)) + .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); + + logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); + logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); + logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); + logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); + logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); + logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + + }); + } + + private static String getJsonValue(JsonElement jElement, String property) { + if (jElement.getAsJsonObject().has(property)) { + JsonElement name = null; + name = jElement.getAsJsonObject().get(property); + if (name != null && !name.isJsonNull()) { + return name.getAsString(); + } + } + return ""; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java new file mode 100644 index 000000000..185e5ec46 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java @@ -0,0 +1,186 @@ + +package eu.dnetlib.doiboost.orcid; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.util.LongAccumulator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.orcid.Work; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; +import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; + +public class SparkUpdateOrcidWorks { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .setSerializationInclusion(JsonInclude.Include.NON_NULL); + + public static void main(String[] args) throws IOException, Exception { + Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateOrcidWorks.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_orcid_data.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + final String workingPath = parser.get("workingPath"); + final String hdfsServerUri = parser.get("hdfsServerUri"); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + LongAccumulator oldWorksFoundAcc = spark + .sparkContext() + .longAccumulator("old_works_found"); + LongAccumulator updatedWorksFoundAcc = spark + .sparkContext() + .longAccumulator("updated_works_found"); + LongAccumulator newWorksFoundAcc = spark + .sparkContext() + .longAccumulator("new_works_found"); + LongAccumulator errorCodeWorksFoundAcc = spark + .sparkContext() + .longAccumulator("error_code_works_found"); + LongAccumulator errorLoadingWorksJsonFoundAcc = spark + .sparkContext() + .longAccumulator("error_loading_works_json_found"); + LongAccumulator errorParsingWorksXMLFoundAcc = spark + .sparkContext() + .longAccumulator("error_parsing_works_xml_found"); + + Function retrieveWorkFunction = jsonData -> { + Work work = new Work(); + JsonElement jElement = new JsonParser().parse(jsonData); + String statusCode = getJsonValue(jElement, "statusCode"); + work.setStatusCode(statusCode); + String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + work.setDownloadDate(Long.toString(System.currentTimeMillis())); + if (statusCode.equals("200")) { + String compressedData = getJsonValue(jElement, "compressedData"); + if (StringUtils.isEmpty(compressedData)) { + errorLoadingWorksJsonFoundAcc.add(1); + } else { + String xmlWork = ArgumentApplicationParser.decompressValue(compressedData); + try { + WorkDetail workDetail = XMLRecordParserNoDoi + .VTDParseWorkData(xmlWork.getBytes()); + work.setWorkDetail(workDetail); + work.setBase64CompressData(compressedData); + return work; + } catch (Exception e) { + logger.error("parsing xml [" + jsonData + "]", e); + errorParsingWorksXMLFoundAcc.add(1); + } + } + } else { + errorCodeWorksFoundAcc.add(1); + } + return work; + }; + + Dataset downloadedWorksDS = spark + .createDataset( + sc + .textFile(workingPath + "downloads/updated_works/*") + .map(s -> { + return s.substring(21, s.length() - 1); + }) + .map(retrieveWorkFunction) + .rdd(), + Encoders.bean(Work.class)); + Dataset currentWorksDS = spark + .createDataset( + sc + .textFile(workingPath.concat("orcid_dataset/works/*")) + .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) + .rdd(), + Encoders.bean(Work.class)); + currentWorksDS + .joinWith( + downloadedWorksDS, + currentWorksDS + .col("workDetail.id") + .equalTo(downloadedWorksDS.col("workDetail.id")) + .and( + currentWorksDS + .col("workDetail.oid") + .equalTo(downloadedWorksDS.col("workDetail.oid"))), + "full_outer") + .map(value -> { + Optional opCurrent = Optional.ofNullable(value._1()); + Optional opDownloaded = Optional.ofNullable(value._2()); + if (!opCurrent.isPresent()) { + newWorksFoundAcc.add(1); + return opDownloaded.get(); + } + if (!opDownloaded.isPresent()) { + oldWorksFoundAcc.add(1); + return opCurrent.get(); + } + if (opCurrent.isPresent() && opDownloaded.isPresent()) { + updatedWorksFoundAcc.add(1); + return opDownloaded.get(); + } + return null; + }, + Encoders.bean(Work.class)) + .filter(Objects::nonNull) + .toJavaRDD() + .map(work -> OBJECT_MAPPER.writeValueAsString(work)) + .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); + + logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); + logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); + logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); + logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); + logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); + logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + + String lastModifiedDateFromLambdaFile = HDFSUtil + .readFromTextFile(hdfsServerUri, workingPath, "last_modified_date_from_lambda_file.txt"); + HDFSUtil.writeToTextFile(hdfsServerUri, workingPath, "last_update.txt", lastModifiedDateFromLambdaFile); + logger.info("last_update file updated"); + }); + } + + private static String getJsonValue(JsonElement jElement, String property) { + if (jElement.getAsJsonObject().has(property)) { + JsonElement name = null; + name = jElement.getAsJsonObject().get(property); + if (name != null && !name.isJsonNull()) { + return name.getAsString(); + } + } + return ""; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index 94f7d8c91..a2342f7b4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -3,11 +3,11 @@ package eu.dnetlib.doiboost.orcid.json; import com.google.gson.Gson; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; public class JsonHelper { - public static String createOidWork(WorkDataNoDoi workData) { + public static String createOidWork(WorkDetail workData) { return new Gson().toJson(workData); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java new file mode 100644 index 000000000..977b55a6f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java @@ -0,0 +1,67 @@ + +package eu.dnetlib.doiboost.orcid.util; + +import java.io.*; +import java.net.URI; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.gson.Gson; + +import eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors; + +public class HDFSUtil { + + static Logger logger = LoggerFactory.getLogger(HDFSUtil.class); + + private static FileSystem getFileSystem(String hdfsServerUri) throws IOException { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsServerUri); + FileSystem fileSystem = FileSystem.get(conf); + return fileSystem; + } + + public static String readFromTextFile(String hdfsServerUri, String workingPath, String path) throws IOException { + FileSystem fileSystem = getFileSystem(hdfsServerUri); + Path toReadPath = new Path(workingPath.concat(path)); + if (!fileSystem.exists(toReadPath)) { + throw new RuntimeException("File not exist: " + path); + } + logger.info("Last_update_path " + toReadPath.toString()); + FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath)); + BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); + StringBuffer sb = new StringBuffer(); + try { + String line; + while ((line = br.readLine()) != null) { + sb.append(line); + } + } finally { + br.close(); + } + String buffer = sb.toString(); + logger.info("Last_update: " + buffer); + return buffer; + } + + public static void writeToTextFile(String hdfsServerUri, String workingPath, String path, String text) + throws IOException { + FileSystem fileSystem = getFileSystem(hdfsServerUri); + Path toWritePath = new Path(workingPath.concat(path)); + if (fileSystem.exists(toWritePath)) { + fileSystem.delete(toWritePath, true); + } + FSDataOutputStream os = fileSystem.create(toWritePath); + BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); + br.write(text); + br.close(); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index cc9abb621..c98d63b91 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,22 +1,19 @@ package eu.dnetlib.doiboost.orcid.xml; -import java.util.Arrays; -import java.util.List; +import java.io.IOException; +import java.util.*; +import org.apache.commons.lang3.StringUtils; import org.mortbay.log.Log; -import com.ximpleware.AutoPilot; -import com.ximpleware.EOFException; -import com.ximpleware.EncodingException; -import com.ximpleware.EntityException; -import com.ximpleware.ParseException; -import com.ximpleware.VTDGen; -import com.ximpleware.VTDNav; +import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.parser.utility.VtdUtilityParser; import eu.dnetlib.dhp.schema.orcid.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorHistory; +import eu.dnetlib.dhp.schema.orcid.AuthorSummary; import eu.dnetlib.doiboost.orcid.model.WorkData; public class XMLRecordParser { @@ -32,9 +29,12 @@ public class XMLRecordParser { private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; private static final String NS_RECORD = "record"; private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; - + private static final String NS_ACTIVITIES = "activities"; + private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities"; private static final String NS_WORK = "work"; private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; + private static final String NS_HISTORY = "history"; + private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history"; private static final String NS_ERROR = "error"; @@ -51,6 +51,7 @@ public class XMLRecordParser { ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL); ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL); ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); + ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL); AuthorData authorData = new AuthorData(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); @@ -89,6 +90,46 @@ public class XMLRecordParser { authorData.setOtherNames(otherNames); } +// final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); +// if (StringUtils.isNoneBlank(creationMethod)) { +// authorData.setCreationMethod(creationMethod); +// } +// +// final String completionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:completion-date"); +// if (StringUtils.isNoneBlank(completionDate)) { +// authorData.setCompletionDate(completionDate); +// } +// +// final String submissionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:submission-date"); +// if (StringUtils.isNoneBlank(submissionDate)) { +// authorData.setSubmissionDate(submissionDate); +// } +// +// final String claimed = VtdUtilityParser.getSingleValue(ap, vn, "//history:claimed"); +// if (StringUtils.isNoneBlank(claimed)) { +// authorData.setClaimed(Boolean.parseBoolean(claimed)); +// } +// +// final String verifiedEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-email"); +// if (StringUtils.isNoneBlank(verifiedEmail)) { +// authorData.setVerifiedEmail(Boolean.parseBoolean(verifiedEmail)); +// } +// +// final String verifiedPrimaryEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-primary-email"); +// if (StringUtils.isNoneBlank(verifiedPrimaryEmail)) { +// authorData.setVerifiedPrimaryEmail(Boolean.parseBoolean(verifiedPrimaryEmail)); +// } +// +// final String deactivationDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:deactivation-date"); +// if (StringUtils.isNoneBlank(deactivationDate)) { +// authorData.setDeactivationDate(deactivationDate); +// } +// +// final String lastModifiedDate = VtdUtilityParser +// .getSingleValue(ap, vn, "//history:history/common:last-modified-date"); +// if (StringUtils.isNoneBlank(lastModifiedDate)) { +// authorData.setLastModifiedDate(lastModifiedDate); +// } return authorData; } @@ -139,6 +180,12 @@ public class XMLRecordParser { return retrieveOrcidId(bytes, defaultValue, NS_WORK, NS_WORK_URL, "//work:work", "put-code"); } + public static String retrieveWorkIdFromSummary(byte[] bytes, String defaultValue) + throws VtdException, ParseException { + return retrieveOrcidId( + bytes, defaultValue, NS_ACTIVITIES, NS_ACTIVITIES_URL, "//work:work-summary", "put-code"); + } + private static String retrieveOrcidId(byte[] bytes, String defaultValue, String ns, String nsUrl, String xpath, String idAttributeName) throws VtdException, ParseException { @@ -148,6 +195,7 @@ public class XMLRecordParser { final VTDNav vn = vg.getNav(); final AutoPilot ap = new AutoPilot(vn); ap.declareXPathNameSpace(ns, nsUrl); + ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL); List recordNodes = VtdUtilityParser .getTextValuesWithAttributes( ap, vn, xpath, Arrays.asList(idAttributeName)); @@ -157,4 +205,144 @@ public class XMLRecordParser { Log.info("id not found - default: " + defaultValue); return defaultValue; } + + public static Map retrieveWorkIdLastModifiedDate(byte[] bytes) + throws ParseException, XPathParseException, NavException, XPathEvalException, IOException { + final VTDGen vg = new VTDGen(); + vg.setDoc(bytes); + vg.parse(true); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL); + ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL); + Map workIdLastModifiedDate = new HashMap<>(); + ap.selectXPath("//work:work-summary"); + String workId = ""; + while (ap.evalXPath() != -1) { + String lastModifiedDate = ""; + int attr = vn.getAttrVal("put-code"); + if (attr > -1) { + workId = vn.toNormalizedString(attr); + } + if (vn.toElement(VTDNav.FIRST_CHILD, "common:last-modified-date")) { + int val = vn.getText(); + if (val != -1) { + lastModifiedDate = vn.toNormalizedString(val); + workIdLastModifiedDate.put(workId, lastModifiedDate); + } + vn.toElement(VTDNav.PARENT); + } + } + return workIdLastModifiedDate; + } + + public static AuthorSummary VTDParseAuthorSummary(byte[] bytes) + throws VtdException, ParseException { + final VTDGen vg = new VTDGen(); + vg.setDoc(bytes); + vg.parse(true); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL); + ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL); + ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL); + ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL); + ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL); + ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); + ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL); + + AuthorData authorData = retrieveAuthorData(ap, vn, bytes); + AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn, bytes); + AuthorSummary authorSummary = new AuthorSummary(); + authorSummary.setAuthorData(authorData); + authorSummary.setAuthorHistory(authorHistory); + return authorSummary; + } + + private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn, byte[] bytes) + throws VtdException { + AuthorData authorData = new AuthorData(); + final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); + if (!errors.isEmpty()) { + authorData.setErrorCode(errors.get(0)); + return authorData; + } + + List recordNodes = VtdUtilityParser + .getTextValuesWithAttributes( + ap, vn, "//record:record", Arrays.asList("path")); + if (!recordNodes.isEmpty()) { + final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1); + authorData.setOid(oid); + } else { + return null; + } + + final List names = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:given-names"); + if (!names.isEmpty()) { + authorData.setName(names.get(0)); + } + + final List surnames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:family-name"); + if (!surnames.isEmpty()) { + authorData.setSurname(surnames.get(0)); + } + + final List creditNames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:credit-name"); + if (!creditNames.isEmpty()) { + authorData.setCreditName(creditNames.get(0)); + } + + final List otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content"); + if (!otherNames.isEmpty()) { + authorData.setOtherNames(otherNames); + } + return authorData; + } + + private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn, byte[] bytes) + throws VtdException { + AuthorHistory authorHistory = new AuthorHistory(); + final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); + if (StringUtils.isNoneBlank(creationMethod)) { + authorHistory.setCreationMethod(creationMethod); + } + + final String completionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:completion-date"); + if (StringUtils.isNoneBlank(completionDate)) { + authorHistory.setCompletionDate(completionDate); + } + + final String submissionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:submission-date"); + if (StringUtils.isNoneBlank(submissionDate)) { + authorHistory.setSubmissionDate(submissionDate); + } + + final String claimed = VtdUtilityParser.getSingleValue(ap, vn, "//history:claimed"); + if (StringUtils.isNoneBlank(claimed)) { + authorHistory.setClaimed(Boolean.parseBoolean(claimed)); + } + + final String verifiedEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-email"); + if (StringUtils.isNoneBlank(verifiedEmail)) { + authorHistory.setVerifiedEmail(Boolean.parseBoolean(verifiedEmail)); + } + + final String verifiedPrimaryEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-primary-email"); + if (StringUtils.isNoneBlank(verifiedPrimaryEmail)) { + authorHistory.setVerifiedPrimaryEmail(Boolean.parseBoolean(verifiedPrimaryEmail)); + } + + final String deactivationDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:deactivation-date"); + if (StringUtils.isNoneBlank(deactivationDate)) { + authorHistory.setDeactivationDate(deactivationDate); + } + + final String lastModifiedDate = VtdUtilityParser + .getSingleValue(ap, vn, "//history:history/common:last-modified-date"); + if (StringUtils.isNoneBlank(lastModifiedDate)) { + authorHistory.setLastModifiedDate(lastModifiedDate); + } + return authorHistory; + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index c2cfafd87..04a3389ed 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -19,8 +19,8 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.json.JsonHelper; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; /** @@ -87,29 +87,29 @@ public class ActivitiesDumpReader { while ((line = br.readLine()) != null) { buffer.append(line); } - WorkDataNoDoi workDataNoDoi = XMLRecordParserNoDoi + WorkDetail workDetail = XMLRecordParserNoDoi .VTDParseWorkData(buffer.toString().getBytes()); - if (workDataNoDoi != null) { - if (workDataNoDoi.getErrorCode() != null) { + if (workDetail != null) { + if (workDetail.getErrorCode() != null) { errorFromOrcidFound += 1; Log .debug( "error from Orcid with code " - + workDataNoDoi.getErrorCode() + + workDetail.getErrorCode() + " for entry " + entry.getName()); continue; } - boolean isDoiFound = workDataNoDoi + boolean isDoiFound = workDetail .getExtIds() .stream() .filter(e -> e.getType() != null) .anyMatch(e -> e.getType().equals("doi")); if (!isDoiFound) { - String jsonData = JsonHelper.createOidWork(workDataNoDoi); - Log.debug("oid: " + workDataNoDoi.getOid() + " data: " + jsonData); + String jsonData = JsonHelper.createOidWork(workDetail); + Log.debug("oid: " + workDetail.getOid() + " data: " + jsonData); - final Text key = new Text(workDataNoDoi.getOid()); + final Text key = new Text(workDetail.getOid()); final Text value = new Text(jsonData); try { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index a92d534d8..5bcec7224 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -4,10 +4,12 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; +import java.util.List; import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; @@ -18,6 +20,7 @@ import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; +import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,14 +33,17 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.orcid.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorSummary; +import eu.dnetlib.dhp.schema.orcid.Work; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.json.JsonHelper; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import scala.Tuple2; /** - * This spark job generates one parquet file, containing orcid publications dataset + * This spark job generates orcid publications no doi dataset */ public class SparkGenEnrichedOrcidWorks { @@ -53,47 +59,65 @@ public class SparkGenEnrichedOrcidWorks { .toString( SparkGenEnrichedOrcidWorks.class .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json"))); + "/eu/dnetlib/dhp/doiboost/gen_orcid-no-doi_params.json"))); parser.parseArgument(args); Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); + final String hdfsServerUri = parser.get("hdfsServerUri"); final String workingPath = parser.get("workingPath"); final String outputEnrichedWorksPath = parser.get("outputEnrichedWorksPath"); - final String outputWorksPath = parser.get("outputWorksPath"); - final String hdfsServerUri = parser.get("hdfsServerUri"); + final String orcidDataFolder = parser.get("orcidDataFolder"); SparkConf conf = new SparkConf(); runWithSparkSession( conf, isSparkSessionManaged, spark -> { + String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); + if (StringUtils.isBlank(lastUpdate)) { + throw new RuntimeException("last update info not found"); + } + final String dateOfCollection = lastUpdate.substring(0, 10); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaPairRDD summariesRDD = sc - .sequenceFile(workingPath + "authors/authors.seq", Text.class, Text.class); - Dataset summariesDataset = spark + Dataset authorDataset = spark .createDataset( - summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(), + sc + .textFile(workingPath.concat(orcidDataFolder).concat("/authors/*")) + .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) + .filter(authorSummary -> authorSummary.getAuthorData() != null) + .map(authorSummary -> authorSummary.getAuthorData()) + .rdd(), Encoders.bean(AuthorData.class)); - logger.info("Authors data loaded: " + summariesDataset.count()); + logger.info("Authors data loaded: " + authorDataset.count()); - JavaPairRDD activitiesRDD = sc - .sequenceFile(workingPath + outputWorksPath + "*.seq", Text.class, Text.class); - Dataset activitiesDataset = spark + Dataset workDataset = spark .createDataset( - activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(), - Encoders.bean(WorkDataNoDoi.class)); - logger.info("Works data loaded: " + activitiesDataset.count()); + sc + .textFile(workingPath.concat(orcidDataFolder).concat("/works/*")) + .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) + .filter(work -> work.getWorkDetail() != null) + .map(work -> work.getWorkDetail()) + .filter(work -> work.getErrorCode() == null) + .filter( + work -> work + .getExtIds() + .stream() + .filter(e -> e.getType() != null) + .noneMatch(e -> e.getType().equalsIgnoreCase("doi"))) + .rdd(), + Encoders.bean(WorkDetail.class)); + logger.info("Works data loaded: " + workDataset.count()); - JavaRDD> enrichedWorksRDD = activitiesDataset + JavaRDD> enrichedWorksRDD = workDataset .joinWith( - summariesDataset, - activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner") + authorDataset, + workDataset.col("oid").equalTo(authorDataset.col("oid")), "inner") .map( - (MapFunction, Tuple2>) value -> { - WorkDataNoDoi w = value._1; + (MapFunction, Tuple2>) value -> { + WorkDetail w = value._1; AuthorData a = value._2; AuthorMatcher.match(a, w.getContributors()); return new Tuple2<>(a.getOid(), JsonHelper.createOidWork(w)); @@ -113,13 +137,25 @@ public class SparkGenEnrichedOrcidWorks { .sparkContext() .longAccumulator("errorsNotFoundAuthors"); final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType"); + final LongAccumulator otherTypeFound = spark.sparkContext().longAccumulator("otherTypeFound"); + final LongAccumulator deactivatedAcc = spark.sparkContext().longAccumulator("deactivated_found"); + final LongAccumulator titleNotProvidedAcc = spark + .sparkContext() + .longAccumulator("Title_not_provided_found"); + final LongAccumulator noUrlAcc = spark.sparkContext().longAccumulator("no_url_found"); + final PublicationToOaf publicationToOaf = new PublicationToOaf( parsedPublications, enrichedPublications, errorsGeneric, errorsInvalidTitle, errorsNotFoundAuthors, - errorsInvalidType); + errorsInvalidType, + otherTypeFound, + deactivatedAcc, + titleNotProvidedAcc, + noUrlAcc, + dateOfCollection); JavaRDD oafPublicationRDD = enrichedWorksRDD .map( e -> { @@ -148,33 +184,10 @@ public class SparkGenEnrichedOrcidWorks { logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); + logger.info("otherTypeFound: " + otherTypeFound.value().toString()); + logger.info("deactivatedAcc: " + deactivatedAcc.value().toString()); + logger.info("titleNotProvidedAcc: " + titleNotProvidedAcc.value().toString()); + logger.info("noUrlAcc: " + noUrlAcc.value().toString()); }); } - - private static AuthorData loadAuthorFromJson(Text orcidId, Text json) { - AuthorData authorData = new AuthorData(); - authorData.setOid(orcidId.toString()); - JsonElement jElement = new JsonParser().parse(json.toString()); - authorData.setName(getJsonValue(jElement, "name")); - authorData.setSurname(getJsonValue(jElement, "surname")); - authorData.setCreditName(getJsonValue(jElement, "creditname")); - return authorData; - } - - private static WorkDataNoDoi loadWorkFromJson(Text orcidId, Text json) { - - WorkDataNoDoi workData = new Gson().fromJson(json.toString(), WorkDataNoDoi.class); - return workData; - } - - private static String getJsonValue(JsonElement jElement, String property) { - if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); - if (name != null && !name.isJsonNull()) { - return name.getAsString(); - } - } - return new String(""); - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 982fb6316..a89bbc279 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -22,6 +22,10 @@ public class JsonWriter { return OBJECT_MAPPER.writeValueAsString(authorData); } + public static String create(Object obj) throws JsonProcessingException { + return OBJECT_MAPPER.writeValueAsString(obj); + } + public static String create(WorkData workData) { JsonObject work = new JsonObject(); work.addProperty("oid", workData.getOid()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 3b8c74062..050f4d327 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -18,7 +18,6 @@ import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.scholexplorer.OafUtils; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; import eu.dnetlib.doiboost.orcidnodoi.util.Pair; @@ -26,21 +25,28 @@ import eu.dnetlib.doiboost.orcidnodoi.util.Pair; /** * This class converts an orcid publication from json format to oaf */ + public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); - public static final String ORCID = StringUtils.upperCase(ModelConstants.ORCID); public final static String orcidPREFIX = "orcid_______"; public static final String OPENAIRE_PREFIX = "openaire____"; public static final String SEPARATOR = "::"; + public static final String DEACTIVATED_NAME = "Given Names Deactivated"; + public static final String DEACTIVATED_SURNAME = "Family Name Deactivated"; + private String dateOfCollection = ""; private final LongAccumulator parsedPublications; private final LongAccumulator enrichedPublications; private final LongAccumulator errorsGeneric; private final LongAccumulator errorsInvalidTitle; private final LongAccumulator errorsNotFoundAuthors; private final LongAccumulator errorsInvalidType; + private final LongAccumulator otherTypeFound; + private final LongAccumulator deactivatedAcc; + private final LongAccumulator titleNotProvidedAcc; + private final LongAccumulator noUrlAcc; public PublicationToOaf( LongAccumulator parsedPublications, @@ -48,13 +54,23 @@ public class PublicationToOaf implements Serializable { LongAccumulator errorsGeneric, LongAccumulator errorsInvalidTitle, LongAccumulator errorsNotFoundAuthors, - LongAccumulator errorsInvalidType) { + LongAccumulator errorsInvalidType, + LongAccumulator otherTypeFound, + LongAccumulator deactivatedAcc, + LongAccumulator titleNotProvidedAcc, + LongAccumulator noUrlAcc, + String dateOfCollection) { this.parsedPublications = parsedPublications; this.enrichedPublications = enrichedPublications; this.errorsGeneric = errorsGeneric; this.errorsInvalidTitle = errorsInvalidTitle; this.errorsNotFoundAuthors = errorsNotFoundAuthors; this.errorsInvalidType = errorsInvalidType; + this.otherTypeFound = otherTypeFound; + this.deactivatedAcc = deactivatedAcc; + this.titleNotProvidedAcc = titleNotProvidedAcc; + this.noUrlAcc = noUrlAcc; + this.dateOfCollection = dateOfCollection; } public PublicationToOaf() { @@ -64,12 +80,19 @@ public class PublicationToOaf implements Serializable { this.errorsInvalidTitle = null; this.errorsNotFoundAuthors = null; this.errorsInvalidType = null; + this.otherTypeFound = null; + this.deactivatedAcc = null; + this.titleNotProvidedAcc = null; + this.noUrlAcc = null; + this.dateOfCollection = null; } private static Map> datasources = new HashMap>() { { - put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + ModelConstants.ORCID)); + put( + ModelConstants.ORCID, + new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + "orcid")); } }; @@ -79,10 +102,10 @@ public class PublicationToOaf implements Serializable { { put("ark".toLowerCase(), new Pair<>("ark", "ark")); - put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); - put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); - put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); - put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); + put("arxiv".toLowerCase(), new Pair<>("arXiv", "arXiv")); + put("pmc".toLowerCase(), new Pair<>("pmc", "PubMed Central ID")); + put("pmid".toLowerCase(), new Pair<>("pmid", "PubMed ID")); + put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcid workid")); put("urn".toLowerCase(), new Pair<>("urn", "urn")); } }; @@ -102,21 +125,15 @@ public class PublicationToOaf implements Serializable { } } + public static final String PID_TYPES = "dnet:pid_types"; + public Oaf generatePublicationActionsFromJson(final String json) { - try { - if (parsedPublications != null) { - parsedPublications.add(1); - } - JsonElement jElement = new JsonParser().parse(json); - JsonObject jObject = jElement.getAsJsonObject(); - return generatePublicationActionsFromDump(jObject); - } catch (Throwable t) { - logger.error("creating publication: " + t.getMessage()); - if (errorsGeneric != null) { - errorsGeneric.add(1); - } - return null; + if (parsedPublications != null) { + parsedPublications.add(1); } + JsonElement jElement = new JsonParser().parse(json); + JsonObject jObject = jElement.getAsJsonObject(); + return generatePublicationActionsFromDump(jObject); } public Oaf generatePublicationActionsFromDump(final JsonObject rootElement) { @@ -142,7 +159,7 @@ public class PublicationToOaf implements Serializable { publication.setLastupdatetimestamp(new Date().getTime()); - publication.setDateofcollection("2020-10-14"); + publication.setDateofcollection(dateOfCollection); publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601()); // Adding external ids @@ -150,8 +167,8 @@ public class PublicationToOaf implements Serializable { .keySet() .stream() .forEach(jsonExtId -> { - final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); - final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); + final String classid = externalIds.get(jsonExtId.toLowerCase()).getKey(); + final String classname = externalIds.get(jsonExtId.toLowerCase()).getValue(); final String extId = getStringValue(rootElement, jsonExtId); if (StringUtils.isNotBlank(extId)) { publication @@ -182,11 +199,19 @@ public class PublicationToOaf implements Serializable { } return null; } + if (titles.stream().filter(t -> (t != null && t.equals("Title Not Supplied"))).count() > 0) { + if (titleNotProvidedAcc != null) { + titleNotProvidedAcc.add(1); + } + return null; + } publication .setTitle( titles .stream() - .map(t -> mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null)) + .map(t -> { + return mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null); + }) .filter(s -> s != null) .collect(Collectors.toList())); // Adding identifier @@ -216,8 +241,23 @@ public class PublicationToOaf implements Serializable { mapQualifier( type, type, ModelConstants.DNET_DATA_CITE_RESOURCE, ModelConstants.DNET_DATA_CITE_RESOURCE)); + Map publicationType = typologiesMapping.get(type); + if ((publicationType == null || publicationType.isEmpty()) && errorsInvalidType != null) { + errorsInvalidType.add(1); + logger.error("publication_type_not_found: " + type); + return null; + } + final String typeValue = typologiesMapping.get(type).get("value"); cobjValue = typologiesMapping.get(type).get("cobj"); + // this dataset must contain only publication + if (cobjValue.equals("0020")) { + if (otherTypeFound != null) { + otherTypeFound.add(1); + } + return null; + } + final Instance instance = new Instance(); // Adding hostedby @@ -228,9 +268,14 @@ public class PublicationToOaf implements Serializable { if (urls != null && !urls.isEmpty()) { instance.setUrl(urls); } else { - dataInfo.setInvisible(true); + if (noUrlAcc != null) { + noUrlAcc.add(1); + } + return null; } + dataInfo.setInvisible(true); + final String pubDate = getPublicationDate(rootElement, "publicationDates"); if (StringUtils.isNotBlank(pubDate)) { instance.setDateofacceptance(mapStringField(pubDate, null)); @@ -241,11 +286,9 @@ public class PublicationToOaf implements Serializable { // Adding accessright instance .setAccessright( - OafUtils - .createAccessRight( - ModelConstants.UNKNOWN, - ModelConstants.UNKNOWN, - ModelConstants.DNET_ACCESS_MODES, + OafMapperUtils + .accessRight( + ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)); // Adding type @@ -266,12 +309,28 @@ public class PublicationToOaf implements Serializable { // Adding authors final List authors = createAuthors(rootElement); if (authors != null && authors.size() > 0) { - publication.setAuthor(authors); - } else { - if (errorsNotFoundAuthors != null) { - errorsNotFoundAuthors.add(1); + if (authors.stream().filter(a -> { + return ((Objects.nonNull(a.getName()) && a.getName().equals(DEACTIVATED_NAME)) || + (Objects.nonNull(a.getSurname()) && a.getSurname().equals(DEACTIVATED_SURNAME))); + }).count() > 0) { + if (deactivatedAcc != null) { + deactivatedAcc.add(1); + } + return null; + } else { + publication.setAuthor(authors); + } + } else { + if (authors == null) { + Gson gson = new GsonBuilder().setPrettyPrinting().create(); + String json = gson.toJson(rootElement); + throw new RuntimeException("not_valid_authors: " + json); + } else { + if (errorsNotFoundAuthors != null) { + errorsNotFoundAuthors.add(1); + } + return null; } - return null; } String classValue = getDefaultResulttype(cobjValue); publication @@ -518,36 +577,33 @@ public class PublicationToOaf implements Serializable { private KeyValue createCollectedFrom() { KeyValue cf = new KeyValue(); - cf.setValue(ORCID); + cf.setValue(ModelConstants.ORCID.toUpperCase()); cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); return cf; } private KeyValue createHostedBy() { - KeyValue hb = new KeyValue(); - hb.setValue("Unknown Repository"); - hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); - return hb; + return ModelConstants.UNKNOWN_REPOSITORY; } private StructuredProperty mapAuthorId(String orcidId) { final StructuredProperty sp = new StructuredProperty(); sp.setValue(orcidId); final Qualifier q = new Qualifier(); - q.setClassid(ORCID.toLowerCase()); - q.setClassname(ORCID.toLowerCase()); + q.setClassid(ModelConstants.ORCID); + q.setClassname(ModelConstants.ORCID_CLASSNAME); q.setSchemeid(ModelConstants.DNET_PID_TYPES); q.setSchemename(ModelConstants.DNET_PID_TYPES); sp.setQualifier(q); final DataInfo dataInfo = new DataInfo(); dataInfo.setDeletedbyinference(false); dataInfo.setInferred(false); - dataInfo.setTrust("0.9"); + dataInfo.setTrust("0.91"); dataInfo .setProvenanceaction( mapQualifier( - "sysimport:crosswalk:entityregistry", - "Harvested", + ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY, + ModelConstants.HARVESTED, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS)); sp.setDataInfo(dataInfo); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java.rej b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java.rej new file mode 100644 index 000000000..76b63a93d --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java.rej @@ -0,0 +1,77 @@ +diff a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java (rejected hunks) +@@ -30,11 +30,11 @@ public class PublicationToOaf implements Serializable { + + static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); + +- public static final String ORCID = "ORCID"; +- public static final String ORCID_PID_TYPE_CLASSNAME = "Open Researcher and Contributor ID"; + public final static String orcidPREFIX = "orcid_______"; + public static final String OPENAIRE_PREFIX = "openaire____"; + public static final String SEPARATOR = "::"; ++ public static final String DEACTIVATED_NAME = "Given Names Deactivated"; ++ public static final String DEACTIVATED_SURNAME = "Family Name Deactivated"; + + private String dateOfCollection = ""; + private final LongAccumulator parsedPublications; +@@ -72,13 +81,18 @@ public class PublicationToOaf implements Serializable { + this.errorsNotFoundAuthors = null; + this.errorsInvalidType = null; + this.otherTypeFound = null; ++ this.deactivatedAcc = null; ++ this.titleNotProvidedAcc = null; ++ this.noUrlAcc = null; + this.dateOfCollection = null; + } + + private static Map> datasources = new HashMap>() { + + { +- put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); ++ put( ++ ModelConstants.ORCID, ++ new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + "orcid")); + + } + }; +@@ -183,6 +197,12 @@ public class PublicationToOaf implements Serializable { + } + return null; + } ++ if (titles.stream().filter(t -> (t != null && t.equals("Title Not Supplied"))).count() > 0) { ++ if (titleNotProvidedAcc != null) { ++ titleNotProvidedAcc.add(1); ++ } ++ return null; ++ } + Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + publication + .setTitle( +@@ -527,24 +562,21 @@ public class PublicationToOaf implements Serializable { + + private KeyValue createCollectedFrom() { + KeyValue cf = new KeyValue(); +- cf.setValue(ORCID); ++ cf.setValue(ModelConstants.ORCID.toUpperCase()); + cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); + return cf; + } + + private KeyValue createHostedBy() { +- KeyValue hb = new KeyValue(); +- hb.setValue("Unknown Repository"); +- hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); +- return hb; ++ return ModelConstants.UNKNOWN_REPOSITORY; + } + + private StructuredProperty mapAuthorId(String orcidId) { + final StructuredProperty sp = new StructuredProperty(); + sp.setValue(orcidId); + final Qualifier q = new Qualifier(); +- q.setClassid(ORCID.toLowerCase()); +- q.setClassname(ORCID_PID_TYPE_CLASSNAME); ++ q.setClassid(ModelConstants.ORCID); ++ q.setClassname(ModelConstants.ORCID_CLASSNAME); + q.setSchemeid(ModelConstants.DNET_PID_TYPES); + q.setSchemename(ModelConstants.DNET_PID_TYPES); + sp.setQualifier(q); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index c0f617868..e36ed3bbf 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -19,8 +19,8 @@ import com.ximpleware.XPathParseException; import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; -import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.dhp.schema.orcid.Contributor; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for searching from a list of publication contributors a @@ -209,7 +209,7 @@ public class AuthorMatcher { } } - private static String toJson(WorkDataNoDoi work) { + private static String toJson(WorkDetail work) { GsonBuilder builder = new GsonBuilder(); Gson gson = builder.create(); return gson.toJson(work); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index f4b093402..15cd4f268 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -12,10 +12,10 @@ import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.parser.utility.VtdUtilityParser; -import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; -import eu.dnetlib.doiboost.orcidnodoi.model.ExternalId; -import eu.dnetlib.doiboost.orcidnodoi.model.PublicationDate; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.dhp.schema.orcid.Contributor; +import eu.dnetlib.dhp.schema.orcid.ExternalId; +import eu.dnetlib.dhp.schema.orcid.PublicationDate; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for parsing xml data with vtd parser @@ -42,7 +42,7 @@ public class XMLRecordParserNoDoi { private static final String NS_ERROR = "error"; - public static WorkDataNoDoi VTDParseWorkData(byte[] bytes) + public static WorkDetail VTDParseWorkData(byte[] bytes) throws VtdException, EncodingException, EOFException, EntityException, ParseException, XPathParseException, NavException, XPathEvalException { final VTDGen vg = new VTDGen(); @@ -54,7 +54,7 @@ public class XMLRecordParserNoDoi { ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL); ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); - WorkDataNoDoi workData = new WorkDataNoDoi(); + WorkDetail workData = new WorkDetail(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); if (!errors.isEmpty()) { workData.setErrorCode(errors.get(0)); diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_doi_author_list_orcid_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_doi_author_list_orcid_parameters.json index b894177b3..41c1a2a7d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_doi_author_list_orcid_parameters.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_doi_author_list_orcid_parameters.json @@ -1,3 +1,5 @@ [{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path", "paramRequired": true}, + {"paramName":"a", "paramLongName":"authorsPath", "paramDescription": "the path of the authors seq file", "paramRequired": true}, + {"paramName":"xw", "paramLongName":"xmlWorksPath", "paramDescription": "the path of the works xml seq file", "paramRequired": true}, {"paramName":"o", "paramLongName":"outputDoiAuthorListPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid-no-doi_params.json similarity index 57% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid-no-doi_params.json index c3a8f92ec..3456329b1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid-no-doi_params.json @@ -1,7 +1,6 @@ [ {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true}, - {"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true}, - {"paramName":"ow", "paramLongName":"outputWorksPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}, + {"paramName":"i", "paramLongName":"orcidDataFolder", "paramDescription": "the folder of orcid data", "paramRequired": true}, {"paramName":"oew", "paramLongName":"outputEnrichedWorksPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/config-default.xml deleted file mode 100644 index 3726022cb..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/config-default.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - jobTracker - hadoop-rm3.garr-pa1.d4science.org:8032 - - - nameNode - hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 - - - queueName - default - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml index 21d092a83..133a6f4bd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml @@ -1,55 +1,99 @@ - - + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + spark2MaxExecutors + 20 + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + workingPath + the working dir base path + + + + + ${jobTracker} + ${nameNode} + - workingPath - the working dir base path + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - yarn - cluster - Gen_Doi_Author_List - eu.dnetlib.doiboost.orcid.SparkGenerateDoiAuthorList - dhp-doiboost-1.2.1-SNAPSHOT.jar - --num-executors 10 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - - -w${workingPath}/ - -odoi_author_list/ - - - - - - + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + yarn-cluster + cluster + GenDoiAuthorList + eu.dnetlib.doiboost.orcid.SparkGenerateDoiAuthorList + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + -w${workingPath}/ + -aauthors/authors.seq + -xwxml/works/*.seq + -odoi_author_list/ + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_update/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_update/oozie_app/workflow.xml new file mode 100644 index 000000000..135e6a4c8 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_update/oozie_app/workflow.xml @@ -0,0 +1,163 @@ + + + + spark2MaxExecutors + 50 + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + workingPath + the working dir base path + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + yarn-cluster + cluster + UpdateOrcidAuthors + eu.dnetlib.doiboost.orcid.SparkUpdateOrcidAuthors + dhp-doiboost-${projectVersion}.jar + + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + + -w${workingPath}/ + -n${nameNode} + -f- + -o- + -t- + + + + + + + + yarn-cluster + cluster + UpdateOrcidWorks + eu.dnetlib.doiboost.orcid.SparkUpdateOrcidWorks + dhp-doiboost-${projectVersion}.jar + + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + + -w${workingPath}/ + -n${nameNode} + -f- + -o- + -t- + + + + + + + + + + + + ${workingPath}/orcid_dataset/new_authors/* + ${workingPath}/orcid_dataset/authors + + + + + + + + + + + + ${workingPath}/orcid_dataset/new_works/* + ${workingPath}/orcid_dataset/works + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml deleted file mode 100644 index 5621415d9..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.action.sharelib.for.java - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - oozie.launcher.mapreduce.map.java.opts - -Xmx4g - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml index b9383558c..fa161ad35 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -1,9 +1,25 @@ + + spark2UpdateStepMaxExecutors + 50 + workingPath the working dir base path + + oozie.action.sharelib.for.java + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + oozie.launcher.mapreduce.map.java.opts + -Xmx4g + token access token @@ -30,7 +46,7 @@ number of cores used by single executor - spark2MaxExecutors + spark2DownloadingMaxExecutors 10 @@ -58,6 +74,8 @@ + ${jobTracker} + ${nameNode} oozie.action.sharelib.for.spark @@ -66,18 +84,16 @@ - - - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - + + - - + @@ -92,22 +108,7 @@ ${shell_cmd} - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidDownloader - -w${workingPath}/ - -n${nameNode} - -flast_modified.csv.tar - -odownloads/ - -t${token} - - + @@ -133,7 +134,16 @@ -olast_modified.seq -t- - + + + + + + + + + + @@ -146,7 +156,7 @@ dhp-doiboost-${projectVersion}.jar --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.dynamicAllocation.maxExecutors=${spark2DownloadingMaxExecutors} --executor-memory=${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} @@ -160,9 +170,151 @@ -odownloads/updated_authors -t${token} + + + + + + + yarn-cluster + cluster + DownloadOrcidWorks + eu.dnetlib.doiboost.orcid.SparkDownloadOrcidWorks + dhp-doiboost-${projectVersion}.jar + + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2DownloadingMaxExecutors} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + + -w${workingPath}/ + -n${nameNode} + -f- + -odownloads/updated_works + -t${token} + + + + + + + + yarn-cluster + cluster + UpdateOrcidAuthors + eu.dnetlib.doiboost.orcid.SparkUpdateOrcidAuthors + dhp-doiboost-${projectVersion}.jar + + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2UpdateStepMaxExecutors} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + + -w${workingPath}/ + -n${nameNode} + -f- + -o- + -t- + + + + + + + + yarn-cluster + cluster + UpdateOrcidWorks + eu.dnetlib.doiboost.orcid.SparkUpdateOrcidWorks + dhp-doiboost-${projectVersion}.jar + + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2UpdateStepMaxExecutors} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + + -w${workingPath}/ + -n${nameNode} + -f- + -o- + -t- + + + + + + + + + + + + ${workingPath}/orcid_dataset/new_authors/* + ${workingPath}/orcid_dataset/authors + + + + + + + + + + + + ${workingPath}/orcid_dataset/new_works/* + ${workingPath}/orcid_dataset/works + + + + + + + + + + + + + + + + + + + + + ${workingPath}/orcid_dataset/authors/* + ${workingPath}/last_orcid_dataset/authors + + + + + + + + + + + + ${workingPath}/orcid_dataset/works/* + ${workingPath}/last_orcid_dataset/works + - + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json index cb696f279..84b4f8418 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json @@ -1,19 +1,9 @@ { - "reference-entry": {"cobj":"0013", "value": "Part of book or chapter of book"}, "report": {"cobj":"0017", "value": "Report"}, - "dataset": {"cobj":"0021", "value": "Dataset"}, "journal-article": {"cobj":"0001", "value": "Article"}, - "reference-book": {"cobj":"0002", "value": "Book"}, "other": {"cobj":"0020", "value": "Other ORP type"}, - "proceedings-article": {"cobj":"0004", "value": "Conference object"}, - "standard": {"cobj":"0038", "value": "Other literature type"}, - "book-part": {"cobj":"0002", "value": "Book"}, - "monograph": {"cobj":"0002", "value": "Book"}, - "report-series": {"cobj":"0017", "value": "Report"}, "book": {"cobj":"0002", "value": "Book"}, "book-chapter": {"cobj":"0013", "value": "Part of book or chapter of book"}, - "peer-review": {"cobj":"0015", "value": "Review"}, - "book-section": {"cobj":"0013", "value": "Part of book or chapter of book"}, "book-review": {"cobj":"0015", "value": "Review"}, "conference-abstract": {"cobj":"0004", "value": "Conference object"}, "conference-paper": {"cobj":"0004", "value": "Conference object"}, @@ -21,7 +11,7 @@ "data-set": {"cobj":"0021", "value": "Dataset"}, "dictionary-entry": {"cobj":"0038", "value": "Other literature type"}, "disclosure": {"cobj":"0038", "value": "Other literature type"}, - "dissertation": {"cobj":"0006", "value": "Doctoral thesis"}, + "dissertation-thesis": {"cobj":"0006", "value": "Doctoral thesis"}, "edited-book": {"cobj":"0002", "value": "Book"}, "encyclopedia-entry": {"cobj":"0038", "value": "Other literature type"}, "lecture-speech": {"cobj":"0010", "value": "Lecture"}, @@ -37,5 +27,17 @@ "supervised-student-publication": {"cobj":"0001", "value": "Article"}, "technical-standard": {"cobj":"0038", "value": "Other literature type"}, "website": {"cobj":"0020", "value": "Other ORP type"}, - "working-paper": {"cobj":"0014", "value": "Research"} + "working-paper": {"cobj":"0014", "value": "Research"}, + "annotation": {"cobj":"0018", "value": "Annotation"}, + "physical-object": {"cobj":"0028", "value": "PhysicalObject"}, + "preprint": {"cobj":"0016", "value": "Preprint"}, + "software": {"cobj":"0029", "value": "Software"}, + "journal-issue": {"cobj":"0001", "value": "Article"}, + "translation": {"cobj":"0038", "value": "Other literature type"}, + "artistic-performance": {"cobj":"0020", "value": "Other ORP type"}, + "online-resource": {"cobj":"0020", "value": "Other ORP type"}, + "registered-copyright": {"cobj":"0020", "value": "Other ORP type"}, + "trademark": {"cobj":"0020", "value": "Other ORP type"}, + "invention": {"cobj":"0020", "value": "Other ORP type"}, + "spin-off-company": {"cobj":"0020", "value": "Other ORP type"} } \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index 6cec48a6d..6513ff7e1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -1,17 +1,18 @@ + + spark2GenNoDoiDatasetMaxExecutors + 40 + sparkDriverMemory memory for driver process - sparkExecutorMemory + spark2GenNoDoiDatasetExecutorMemory + 2G memory for individual executor - - sparkExecutorCores - number of cores used by single executor - oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* @@ -73,8 +74,9 @@ eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks dhp-doiboost-${projectVersion}.jar - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2GenNoDoiDatasetMaxExecutors} + --executor-memory=${spark2GenNoDoiDatasetExecutorMemory} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} @@ -83,8 +85,7 @@ -w${workingPath}/ -n${nameNode} - -f- - -owno_doi_works/ + -ilast_orcid_dataset -oewno_doi_dataset diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 66a7badb7..d96955c4a 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -10,30 +10,28 @@ import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.time.Duration; -import java.time.LocalDateTime; -import java.time.temporal.TemporalUnit; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.utils.Lists; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; -import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.orcid.AuthorData; +import eu.dnetlib.doiboost.orcid.xml.XMLRecordParserTest; import jdk.nashorn.internal.ir.annotations.Ignore; public class OrcidClientTest { - final String orcidId = "0000-0001-7291-3210"; final int REQ_LIMIT = 24; final int REQ_MAX_TEST = 100; final int RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL = 10; @@ -42,69 +40,45 @@ public class OrcidClientTest { String toNotRetrieveDate = "2019-09-29 23:59:59.000000"; String lastUpdate = "2019-09-30 00:00:00"; String shortDate = "2020-05-06 16:06:11"; + final String REQUEST_TYPE_RECORD = "record"; + final String REQUEST_TYPE_WORK = "work/47652866"; + final String REQUEST_TYPE_WORKS = "works"; + + private static Path testPath; + + @BeforeAll + private static void setUp() throws IOException { + testPath = Files.createTempDirectory(XMLRecordParserTest.class.getName()); + System.out.println("using test path: " + testPath); + } // curl -i -H "Accept: application/vnd.orcid+xml" // -H 'Authorization: Bearer 78fdb232-7105-4086-8570-e153f4198e3d' // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' @Test - private void multipleDownloadTest() throws Exception { - int toDownload = 10; - long start = System.currentTimeMillis(); - OrcidDownloader downloader = new OrcidDownloader(); - TarArchiveInputStream input = new TarArchiveInputStream( - new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); - TarArchiveEntry entry = input.getNextTarEntry(); - BufferedReader br = null; - StringBuilder sb = new StringBuilder(); - int rowNum = 0; - int entryNum = 0; - int modified = 0; - while (entry != null) { - br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput - String line; - while ((line = br.readLine()) != null) { - String[] values = line.toString().split(","); - List recordInfo = Arrays.asList(values); - String orcidId = recordInfo.get(0); - if (downloader.isModified(orcidId, recordInfo.get(3))) { - slowedDownDownload(orcidId); - modified++; - } - rowNum++; - if (modified > toDownload) { - break; - } - } - entryNum++; - entry = input.getNextTarEntry(); - } - long end = System.currentTimeMillis(); - logToFile("start test: " + new Date(start).toString()); - logToFile("end test: " + new Date(end).toString()); - } - - @Test - private void downloadTest(String orcid) throws Exception { - String record = testDownloadRecord(orcid); - String filename = "/tmp/downloaded_".concat(orcid).concat(".xml"); + public void downloadTest() throws Exception { + final String orcid = "0000-0001-7291-3210"; + String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); + String filename = testPath + "/downloaded_record_".concat(orcid).concat(".xml"); File f = new File(filename); OutputStream outStream = new FileOutputStream(f); IOUtils.write(record.getBytes(), outStream); } - private String testDownloadRecord(String orcidId) throws Exception { + private String testDownloadRecord(String orcidId, String dataType) throws Exception { try (CloseableHttpClient client = HttpClients.createDefault()) { - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); + HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/" + dataType); httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d"); - logToFile("start connection: " + new Date(System.currentTimeMillis()).toString()); + long start = System.currentTimeMillis(); CloseableHttpResponse response = client.execute(httpGet); - logToFile("end connection: " + new Date(System.currentTimeMillis()).toString()); + long end = System.currentTimeMillis(); if (response.getStatusLine().getStatusCode() != 200) { - System.out - .println("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); + logToFile( + testPath, "Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); } + logToFile(testPath, orcidId + " " + dataType + " " + (end - start) / 1000 + " seconds"); return IOUtils.toString(response.getEntity().getContent()); } catch (Throwable e) { e.printStackTrace(); @@ -129,7 +103,7 @@ public class OrcidClientTest { } String[] values = line.split(","); List recordInfo = Arrays.asList(values); - testDownloadRecord(recordInfo.get(0)); + testDownloadRecord(recordInfo.get(0), REQUEST_TYPE_RECORD); long endReq = System.currentTimeMillis(); nReqTmp++; if (nReqTmp == REQ_LIMIT) { @@ -189,20 +163,24 @@ public class OrcidClientTest { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); - logToFile("\n\ndownloaded \n\n" + recordFromSeqFile); - final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161"); + logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); + final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); assertTrue(recordFromSeqFile.equals(downloadedRecord)); } @Test - private void lambdaFileReaderTest() throws Exception { + @Disabled + public void lambdaFileReaderTest() throws Exception { + String last_update = "2021-01-12 00:00:06.685137"; TarArchiveInputStream input = new TarArchiveInputStream( - new GzipCompressorInputStream(new FileInputStream("/develop/last_modified.csv.tar"))); + new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); TarArchiveEntry entry = input.getNextTarEntry(); BufferedReader br = null; StringBuilder sb = new StringBuilder(); - int rowNum = 0; + int rowNum = 1; + int modifiedNum = 1; int entryNum = 0; + boolean firstNotModifiedFound = false; while (entry != null) { br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput String line; @@ -210,59 +188,44 @@ public class OrcidClientTest { String[] values = line.toString().split(","); List recordInfo = Arrays.asList(values); assertTrue(recordInfo.size() == 4); - + String orcid = recordInfo.get(0); + String modifiedDate = recordInfo.get(3); rowNum++; - if (rowNum == 1) { + if (rowNum == 2) { assertTrue(recordInfo.get(3).equals("last_modified")); - } else if (rowNum == 2) { - assertTrue(recordInfo.get(0).equals("0000-0002-0499-7333")); + } else { +// SparkDownloadOrcidAuthors.lastUpdate = last_update; +// boolean isModified = SparkDownloadOrcidAuthors.isModified(orcid, modifiedDate); +// if (isModified) { +// modifiedNum++; +// } else { +// if (!firstNotModifiedFound) { +// firstNotModifiedFound = true; +// logToFile(orcid + " - " + modifiedDate + " > " + isModified); +// } +// } + } } entryNum++; assertTrue(entryNum == 1); entry = input.getNextTarEntry(); + } + logToFile(testPath, "modifiedNum : " + modifiedNum + " / " + rowNum); } - @Test - private void lambdaFileCounterTest() throws Exception { - final String lastUpdate = "2020-09-29 00:00:00"; - OrcidDownloader downloader = new OrcidDownloader(); - TarArchiveInputStream input = new TarArchiveInputStream( - new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); - TarArchiveEntry entry = input.getNextTarEntry(); - BufferedReader br = null; - StringBuilder sb = new StringBuilder(); - int rowNum = 0; - int entryNum = 0; - int modified = 0; - while (entry != null) { - br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput - String line; - while ((line = br.readLine()) != null) { - String[] values = line.toString().split(","); - List recordInfo = Arrays.asList(values); - String orcidId = recordInfo.get(0); - if (downloader.isModified(orcidId, recordInfo.get(3))) { - modified++; - } - rowNum++; - } - entryNum++; - entry = input.getNextTarEntry(); - } - logToFile("rowNum: " + rowNum); - logToFile("modified: " + modified); - } - - private void logToFile(String log) - throws IOException { + public static void logToFile(Path basePath, String log) throws IOException { log = log.concat("\n"); - Path path = Paths.get("/tmp/orcid_log.txt"); + Path path = basePath.resolve("orcid_log.txt"); + if (!Files.exists(path)) { + Files.createFile(path); + } Files.write(path, log.getBytes(), StandardOpenOption.APPEND); } @Test + @Disabled private void slowedDownDownloadTest() throws Exception { String orcid = "0000-0001-5496-1243"; String record = slowedDownDownload(orcid); @@ -281,16 +244,17 @@ public class OrcidClientTest { CloseableHttpResponse response = client.execute(httpGet); long endReq = System.currentTimeMillis(); long reqSessionDuration = endReq - start; - logToFile("req time (millisec): " + reqSessionDuration); + logToFile(testPath, "req time (millisec): " + reqSessionDuration); if (reqSessionDuration < 1000) { - logToFile("wait ...."); + logToFile(testPath, "wait ...."); Thread.sleep(1000 - reqSessionDuration); } long end = System.currentTimeMillis(); long total = end - start; - logToFile("total time (millisec): " + total); + logToFile(testPath, "total time (millisec): " + total); if (response.getStatusLine().getStatusCode() != 200) { - logToFile("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); + logToFile( + testPath, "Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); } return IOUtils.toString(response.getEntity().getContent()); } catch (Throwable e) { @@ -298,4 +262,89 @@ public class OrcidClientTest { } return new String(""); } + + @Test + public void downloadWorkTest() throws Exception { + String orcid = "0000-0003-0015-1952"; + String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); + String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml"); + File f = new File(filename); + OutputStream outStream = new FileOutputStream(f); + IOUtils.write(record.getBytes(), outStream); + } + + @Test + public void downloadRecordTest() throws Exception { + String orcid = "0000-0001-5004-5918"; + String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); + String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml"); + File f = new File(filename); + OutputStream outStream = new FileOutputStream(f); + IOUtils.write(record.getBytes(), outStream); + } + + @Test + public void downloadWorksTest() throws Exception { + String orcid = "0000-0001-5004-5918"; + String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS); + String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml"); + File f = new File(filename); + OutputStream outStream = new FileOutputStream(f); + IOUtils.write(record.getBytes(), outStream); + } + + @Test + public void downloadSingleWorkTest() throws Exception { + String orcid = "0000-0001-5004-5918"; + String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); + String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml"); + File f = new File(filename); + OutputStream outStream = new FileOutputStream(f); + IOUtils.write(record.getBytes(), outStream); + } + + @Test + public void cleanAuthorListTest() throws Exception { + AuthorData a1 = new AuthorData(); + a1.setOid("1"); + a1.setName("n1"); + a1.setSurname("s1"); + a1.setCreditName("c1"); + AuthorData a2 = new AuthorData(); + a2.setOid("1"); + a2.setName("n1"); + a2.setSurname("s1"); + a2.setCreditName("c1"); + AuthorData a3 = new AuthorData(); + a3.setOid("3"); + a3.setName("n3"); + a3.setSurname("s3"); + a3.setCreditName("c3"); + List list = Lists.newArrayList(); + list.add(a1); + list.add(a2); + list.add(a3); + + Set namesAlreadySeen = new HashSet<>(); + assertTrue(list.size() == 3); + list.removeIf(a -> !namesAlreadySeen.add(a.getOid())); + assertTrue(list.size() == 2); + } + + @Test + @Ignore + public void testUpdatedRecord() throws Exception { + final String base64CompressedRecord = IOUtils + .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); + final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); + logToFile(testPath, "\n\nrecord updated \n\n" + record); + } + + @Test + @Ignore + private void testUpdatedWork() throws Exception { + final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA=="; + final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); + logToFile(testPath, "\n\nwork updated \n\n" + work); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index b7be5e5cd..2fe00bd57 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -1,20 +1,44 @@ package eu.dnetlib.doiboost.orcid.xml; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.orcid.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorSummary; +import eu.dnetlib.dhp.schema.orcid.Work; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; +import eu.dnetlib.doiboost.orcid.OrcidClientTest; import eu.dnetlib.doiboost.orcid.model.WorkData; import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; +import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class XMLRecordParserTest { + private static final String NS_WORK = "work"; + private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; + private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; + private static final String NS_COMMON = "common"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static Path testPath; + + @BeforeAll + private static void setUp() throws IOException { + testPath = Files.createTempDirectory(XMLRecordParserTest.class.getName()); + } @Test - private void testOrcidAuthorDataXMLParser() throws Exception { + public void testOrcidAuthorDataXMLParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml")); @@ -26,10 +50,11 @@ public class XMLRecordParserTest { System.out.println("name: " + authorData.getName()); assertNotNull(authorData.getSurname()); System.out.println("surname: " + authorData.getSurname()); + OrcidClientTest.logToFile(testPath, OBJECT_MAPPER.writeValueAsString(authorData)); } @Test - private void testOrcidXMLErrorRecordParser() throws Exception { + public void testOrcidXMLErrorRecordParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml")); @@ -42,7 +67,7 @@ public class XMLRecordParserTest { } @Test - private void testOrcidWorkDataXMLParser() throws Exception { + public void testOrcidWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( @@ -54,8 +79,7 @@ public class XMLRecordParserTest { assertNotNull(workData); assertNotNull(workData.getOid()); System.out.println("oid: " + workData.getOid()); - assertNotNull(workData.getDoi()); - System.out.println("doi: " + workData.getDoi()); + assertNull(workData.getDoi()); } @Test @@ -64,9 +88,6 @@ public class XMLRecordParserTest { String xml = IOUtils .toString( this.getClass().getResourceAsStream("summary_0000-0001-5109-1000_othername.xml")); - - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getOtherNames()); @@ -74,4 +95,43 @@ public class XMLRecordParserTest { String jsonData = JsonWriter.create(authorData); assertNotNull(jsonData); } + +// @Test +// private void testWorkIdLastModifiedDateXMLParser() throws Exception { +// String xml = IOUtils +// .toString( +// this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); +// Map workIdLastModifiedDate = XMLRecordParser.retrieveWorkIdLastModifiedDate(xml.getBytes()); +// workIdLastModifiedDate.forEach((k, v) -> { +// try { +// OrcidClientTest +// .logToFile( +// k + " " + v + " isModified after " + SparkDownloadOrcidWorks.lastUpdateValue + ": " +// + SparkDownloadOrcidWorks.isModified("0000-0001-5004-5918", v)); +// } catch (IOException e) { +// } +// }); +// } + + @Test + public void testAuthorSummaryXMLParser() throws Exception { + String xml = IOUtils + .toString( + this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); + AuthorSummary authorSummary = XMLRecordParser.VTDParseAuthorSummary(xml.getBytes()); + authorSummary.setBase64CompressData(ArgumentApplicationParser.compressArgument(xml)); + OrcidClientTest.logToFile(testPath, JsonWriter.create(authorSummary)); + } + + @Test + public void testWorkDataXMLParser() throws Exception { + String xml = IOUtils + .toString( + this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); + WorkDetail workDetail = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + Work work = new Work(); + work.setWorkDetail(workDetail); + work.setBase64CompressData(ArgumentApplicationParser.compressArgument(xml)); + OrcidClientTest.logToFile(testPath, JsonWriter.create(work)); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 1f77197ab..efe01522c 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -21,8 +21,8 @@ import com.ximpleware.XPathParseException; import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; -import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.dhp.schema.orcid.Contributor; +import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; public class OrcidNoDoiTest { @@ -48,7 +48,7 @@ public class OrcidNoDoiTest { if (p == null) { logger.info("XMLRecordParserNoDoi null"); } - WorkDataNoDoi workData = null; + WorkDetail workData = null; try { workData = p.VTDParseWorkData(xml.getBytes()); } catch (Exception e) { @@ -105,7 +105,7 @@ public class OrcidNoDoiTest { if (p == null) { logger.info("XMLRecordParserNoDoi null"); } - WorkDataNoDoi workData = null; + WorkDetail workData = null; try { workData = p.VTDParseWorkData(xml.getBytes()); } catch (Exception e) { @@ -136,7 +136,7 @@ public class OrcidNoDoiTest { if (p == null) { logger.info("XMLRecordParserNoDoi null"); } - WorkDataNoDoi workData = null; + WorkDetail workData = null; try { workData = p.VTDParseWorkData(xml.getBytes()); } catch (Exception e) { @@ -179,7 +179,7 @@ public class OrcidNoDoiTest { if (p == null) { logger.info("XMLRecordParserNoDoi null"); } - WorkDataNoDoi workData = null; + WorkDetail workData = null; try { workData = p.VTDParseWorkData(xml.getBytes()); } catch (Exception e) { @@ -308,7 +308,7 @@ public class OrcidNoDoiTest { if (p == null) { logger.info("XMLRecordParserNoDoi null"); } - WorkDataNoDoi workData = null; + WorkDetail workData = null; try { workData = p.VTDParseWorkData(xml.getBytes()); } catch (Exception e) { diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0002-6664-7451_work.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0002-6664-7451_work.compressed.base64 new file mode 100644 index 000000000..7e5a73b73 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0002-6664-7451_work.compressed.base64 @@ -0,0 +1 @@ +H4sIAAAAAAAAAO1c63LbNhb+n6fA6EebTE2JulpyYnXVpE2a1Jus7V5mO/0BkZCImCJVgLSidjqzf/cJ9oH2TfZJ9jsASVESLWdsddNulJlcDJxzcO4XEJMnn7+bhexaKC3j6LTWrLs1JiIv9mU0Pa19e/mV068xnfDI52EcidPaUuja58MHTxaxujqhP9g8TRwgYK/Xb/Z7TbdZY3OeBKe1hotfDn63nF6v13GOO91mg3AaK8hrqeVYhjJZntbm6TiUXo2BpUifyCgRKuLhaS1IkvlJo7FYLOqx8qSPP6eNSDdyiBxD+KnHEyPITSgFSI7jS53IyNuNVQIq8MRcCZAS/g60AibHipNAKCfiM3Ez1gomx5qJ2RgWCuT8ZqwVTKENpWK1QxO0ncN68Wy2SwF2P4eGULHaIbfdz6HnYCuGlRxfJFyG+ma8TcicwpVYLnYemAEUks+AvUNy2i5g31kfcqQvokROpNils23gnM4kjWzM3ISbARRaUWIiFEJN7FLICijH476vhN6BkwGsouhawgGdeazlbiffhMwpUMDejEW7OWSAMInV8mbgDGBlp3kYL2dQ5S5j5TA51s8pD6H62yJ9DSzH1UJdS29H8GUA6757m8cWtkGGgA7lLpOuYFbRpAVXHgV9qna47TrcikP8rMS1FItdbBZAOd44DXdYlXY3+QMBHadql/a2QGvDBwy/ntj8ceIpQdnQ8fHnsOW2UByaTtu9bLVOOv2TJqpPx/37k0YV9BqdkOvEmaFIIQLL1Jqu02pdus0T1z1xe/VOu7+iVoGzRtMybNe21x0vlPBBBP4KogyVKjkkrWioZaUSi9QYvXnjdH948bfLL1vtN98evx5dXA4KvgizkiTV0OFOVANRiRvEOhkWfBQIZnklYeNWETeUQEVp+ApZ7FPNnsZhKKaCfRNHfhxt0jKQDypOyRZN+5DIJKzQuF2+iD3JQ/aF4jJiX6W2+mLhjCepMkHNsPFXsRjHKmJfRxMeJZp9L5OAoVsx/4jThHH2FZ/JcMle2NzD4gkbpYnUM3YxF16i0hl7JjWqh1AFqyXGnjQ2WbW8v4U0VAnsxsvR2Qi8JKYhiuciytDWoUroOohVgjqnPSXnJMzwkzB5PP9kmjz+ejbHHkfSP2HfBzxhUkNShD1lZxYrxr2fU6nwb8gfiVSh97oWYTynJAkFeTCISeCa6dSDNjTjVmCdC+xnArOHo4tnj+iAKCZVTeQ7OiJNoAdxxMbQn4x0IrhPMJxdp2EkFLf9GktiLBU0odcEtkr0ERO0CONB69paEVGHVJyGlPfq7GtbPZdwJIZmh41lHMZTpOqQzYQX8AjM4jhtkEnoBVl1/XAljBI0C+P4ighBTOQeHAmtIPELWkApQ3cZkihiEithTzMeBXl0wOcgPl4SXBLxZOP8yEcoGxTxDolemjpMcobI4DjRcIVtLTLJ62wUyRmo6CT1ISn0P50KnQAIZtSp9gRsvdJehfFyy+B4JTVILAIRsamIRCK9nCWBSq3iKEMB3JVmE8sqeCnZn4foV6gZp7bFsK6XkRcAN051poisIBm9kawkqdUF/Sv2rRskKN0sgEojsKugTnAl3iGyIuuHQTrj5I0I0QQmJmduGG8u3Pr1+K2go+DVlzEZF00KSUfdrmU0slENLiercJ+twp3Yt+5kOfek8lKo3fjmhrPAl23YB6Wwv3hmQ8akjEomnwktp9ERuxAJGv7pkUklb7iC8uWcEswJMo1VhhdTCBtTG+rtXiF+xkJkebFZqJKdoxUKukOhFrAoJJ5aa1MRjSgPMDjV1Ph4wi4SdhnEM1jiRaznkuwEmWwSPmJfRtMQ5x6xVBt45gtfmgkkO6lQXk5SLxHfMxg0WZBNX6aRYK32EWu5za4Vf5ROU/hw06z160hza1IiaShNqWyqhADPIScj203S+MPzzx4ZOmRoG4V5JIfC5BBKTiSvDSIDu6bJSgU+PHcesQUo4khPpSY3ZjFgbVJnFyVfp1CD7GVnt3pQYmpCJZTRFUiAn8zHch9kC07Gns05Um6Vz5wRmdc2Z1ruzwTXKax3ws4z6vhhjr8pFxkut84gQbQIESG5Bxetv82zZjbWAXZnGI4cjthYaqlzzbKQ0shmhBfiEkVwKbgXZBIbsVINelQfQNSwbLJb7JVYswUlEiXF8YwEtuCJMSUn2slZqrPnKk7nJudnw8sR0UgUOgZyOaMA8Q7ehfYBLj2WKgmKn7THI+t4U0Pm3/8yO2bW54YlkDP6yvNPlVHOhUa1gQUuoZuJJF7R8qFciYR4AZummE5Ys8/OPwN12z48bLYRf6F4DIX4EhntR8WjqfjJVAjkW41SR25UZrXTqg/a7MeOW3ddp9Op93s/gT9xpa3b0wHOfQ/ouuzH9qDeGtAB3X5+QDkYg9hqBdIEqNeUx8z4EyUmaqaUZo2TbNWBzQqgAJwYhqgAKLiClrDZjD1M/vOPf57id6ve6T9mb7Kf0LVbUUMxAR4Kl7B9CKVNsFagteuD3jpandIpJlZTr45sijCeycsC3OgJuV8T1zzK2NViSpXRNCQmMCami0lDXubEbVcI4ME9AZeIEvNWGzn1E1Yi4ZZJgJ45ahuyVe83NyA3VFyGPT6uoloJ2u2ugVptrrz56DZ7+4JGLMoBMRX19oBSTadrnevTbZc8onpNGNXkstNklFOFZUqub84w6RmzQdZcVIXu0zjywlTbBgZGOUdavLbt8EWl1+q8GfSZj2kKGWa9aVilMkRClsxMQTTtOvLVJdVzW8gncWoSKrXdRatguxvoM+DXtqzeUvOMB290JFshuDvPkuT+Uq9LYlx/JYG6obrMVQzXNR2APdWx3X5WdWAQRLMhWtJ/NrFsDyalqcVDv7Fa2153kuVcDMdynIh3Gb31rZvwrnmYiuFfTKMVil87/nG33ez1B72+3/EHYtxqdwb+2D9u9pu+N3aPQMeMVIbWKat9gGGxRkzwMaIDnmiYOAxuh8Htzz64/fGmtMNIdhjJdo5kh/nrQ89fh2HrMGwdhq0//rB1mKz+h5OVnQ9S1EqVDSkv0Vsm7KnkSqF6c8PIS8ooaFzZ60/PoGgvQCuccJC2BuIhYhIjx0wie19blGd8gj6XfUGdQyjM0jeph940Zk8NN7HzHHnOt1ujCBxES/ZGIcLMypczMPwiBffWCy4SIaOFQGf168sYrERYfxXyVP+WcUhrnL1C6uQ6o0Bl/41QympztBRoydlLfk3lDAvfhdwHz4qDeIwKFIiM93MevYUORldxKK64sudTqQ7Yd9JLYpUdqcU8YC/4WzKekVl4aKLYWarmwTLTwrUEJ/6CK99ydYlaeCXZCIIG0qw8p3YCzdOZNwqpbTMmWULDLJ8b0T4NzOoM9THIVvlc0ZIfS1YANt1603Wbjbcc/mrdmz7z1YlAvdnv9Q0V8DhNKW0SCjV+6BjMxnUcpjORH2qWsk+DmWtsfj80IFLraMVq97jjtPtu12zl7YiirREsSrkbjY9vhrFRFiH08oGgo5QeB2WEOlj6bXM6twN4+Yvn+qyffbClGT7/ppkN6/kH0mK8L75fm9dclvzqc3sZgkwxJA0WH17NyhacMc7Q7RRgdmELzufLodstoOjH9U/Q1Szl6KXXPXqbeGm3+pt7CcBedmSfwkk9WCuY2IK7lZo1Tn4p4tCtiEPXIg7dizjli5HKQ0q23XVKRKkrlL9Qy438oaV5l4N6JGp3P3tF9HYGbLZHug3kfIhmfFJJcQ1q+y1DpZnubsP5bA+Wa7uDbrPZ6/xe1tlJ/89uAbEHA7Qc3aq7Tr/r9jrtVrvd7f5epnjPk/7sRkFtvLdRbi2pv5eN7nbwhzdZ1Y5eL2GpCotnaFdeOEdrVcffde7V06uGuZ4OGyJqlAqhbtjm1TGXL86qa3ZWHbKDjaxjd7IJw6HW20GX5WT3QQ537H2Qk90HOfHEsffXTn7X7OS3pA/fp6A8qgfJLCw9lAvXvkXQjYYcpziqXK0396qNVQJwzDO5dbB1ldqXfWsP+/KH7U3neNBpOt1W2y3xKW+mZp7s7cKueNPXeD+mM9ExrMnEvr/bHDjO4uiXOH+aVgasolM6jCf2n0JXCLYFrdDbD+3gkx+1ubsh33sduA32wazecvpuu+30Bt0dzzhvtHoV9l6tftNIeTD8/Q3fG7htRO3gLuFehb1Pw2/eFhzsfV97t52WOzh2BseDH+5g7yrsfdp7/SLoI7T2lsDV92AHzYjh2jXgQSFiWLoF/QjVsfe62G73eo47aLfuVBe3sffaELFXxSX3R2jrigaxfKN/0Aglg+KDxkeojr3PxL1O59jptbp3aZqqsPeZDMrfqj5CW28JXPWp7qAXGqbWvlR+hCrZe4/QbTc7znGv1btTj7CNvc+0sPYR+mDs+xu71Ru4Trcz6N7J2NvY+70hK70vOBh7D7di+f/ucrdbsS3svd6S2Kcjz7PHIwdz3/9SrNOnTxdu7y6JvAp7r/1ddtGx9j7oYPQ9TPjdrus00ZzfbcLfwt6n0deefh2MfX9jdzq9ntNqd9p3MvY29j6Nvfmq7//M3tvrG9/480eG5j9dG4rVf72yvvEgI0R/DB/8F4+Tql7oTQAA \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 index 8dc3d32ad..34de6ba16 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 @@ -1 +1 @@ -H4sIAAAAAAAAAO1dW5fbthF+z6/A2XPal5biRaREqmvlrG+JG6/t4900bd+4JLSCQxIqSa1X+fUFeIUkAiIlkpFs5jRxTQ1mgAEw881gQF7/+Ox74AmGEcLBiyt1pFwBGDjYRcHji6tf799K5hWIYjtwbQ8H8MXVBkZXP85/uA6hg0N3lv4BVna8fHElK+Qfifw7lsaKZkoTdaJeASIgiGYoiGEY2N6Lq2Ucr2ay/PXr1xEOHeSS/z7KQSTnFHkL6K4dO066xWtSkORtXBTFKHDErRiioh1ckZHYMXQFzQqavBWOlzCUAtuH/FYlTd7Kh/4D0fcSrfitSppCG2GIQ4Em6M85rYN9X6SA9PecOp1CPnX6e069It3CZJYkF8Y28iJ+u13KnMPvcPNVKDAjKEbuk9aCkdOfC9rndA1JyIVBjBYIinS2T5zzWayDdAfw2mYEhVZCuIAh2ThQpJCSKG9nu24II0GbjKDcRU+ILEBphSMkXuS7lDkHotnf+a3orznlkmwTHG74xBlBOU8rD298okrRZOU0eav/rW2PqP7QTt8iy9tGMHxCjmDzZQTba/fQii3mhlgIokMkmtKSptxNEbRDh276dShYttt0ZQ/J30P4hOBXUTcLorzdw9oTzCr9dbd/hEGE16FIe3ukV/MfAPnnOrUfs4SY2TzpryzFOkRzyj0i7EvWFV7iWmZa7LGh3mUuapUQ7DVb4iieF2IL4uRxOhBZOJJrZsOyO5yRxFJ42LE9OIfBtVzxOBMoZHmd7ah86zGC8l+cECZbQPJhvMTu/DZxFFLCKYTutcwj3GcVrR98FFG/L7nEq801RdUlxZK08b2mzDR9NlZHlmX9t+S522JP454dxZJPwANRoptz1RRJVSV1eq+NZwZhrIx0TflvofuKNhXD9mzkQ3ceh2vIjDF7uk9PAE3KL/EOO812fhS0XoXIt8ONmMs2UTbPlTN5nRqYzA4JQFNuiWpqWDUlZSqpk3vVnCnaTLNGxsSqqeGsSxSggCcUoQfkoZgY/dX6wUPOVdbJKmBXMmE7mKw7pmsTSdEl1Ugm35ypxshUpmXXtqgr+VUPWMxVNGBm0CU0mT2iJxgkKC2avwwJ2sV0F4uoDjBc2D7yNgnt/PWacIwr+LFE5YzIzJQwj0sgyeDOSLSIGLIrmeG07Xp2PJaQ4w7pFtdk+adgTcgjxWtsywzj5GBIPKgcELEMMsCYI0th+5xmu+/7SLAKSorHVUHP2SNtb+ImYwCrdSyR+I74fVUxjYkyuRLs+9ojlQtmJLpaefZGQoELn4nl2NGByFaINcC3FV3rluWfIqH93/dpJMdDRD9ES9XUbItqoJQyKOZAkwzL1CTTMsfVeInHfQs/VXHZxk88Ngfx1F5DuZFCdtSX2L87B6/WEZDAGy+iiDfc5bltJavY2cSkhAkUwiF6RPQP5/g5qQ1ea03GYTDb/mQ00QdXh4naM08JcgcnJN7fUfKBLZULZ+yNFG9WxaK4WRNkG4J3rwtOe5S1eD7Z3hrO9SmZBFXVp4pSyS+lqsWQ+MY5E1RFSXdHhJBE5V/t0JXtpOevUxgwIuQ/pk/evX7BdOOvtr/6x8oO4wDSX24/mPcfbz7fVfaOiqzVtxB6SVxAc0vzCHqLSnZbVNt+psr8VzkaFtHU9a9FlMTi5OxhGWozkbkUrX0KvoWIoYzRj49Y1Jrwku0mk2cUIgeWbhsYlbyKTKcYgxRUTZAHO1zdmmnaSB2bDZAHOzOLBcERaeD5GOL1qqGjPrErnEUfyRVkha5K3ZarqcBI+tTSLGMP1ahigJQzlPPmFQhLbHB3oREbmVsUwChjvS406kPrrAwRRNqnO+SO2RYtu2SW9YlumWXV2DUnjeVGWqnCShx3fBgoHXLErEAXUo9EM7gpx1dL6BP7FW4KLrsUQnYh9qAUo9iD80/L0pEzj8VLLSaiBEuSpd2Q0JVupXJKkycH25F/6dIwi2bpg4PtXHsz14xSLfbmkPoPDKawbIFoF1YN2TxqyKp2zJDVJkMWD6VMND/aAfojMamHO5Esul8DlBxqxhuAF+C3DfZRUG5F/rpkGWWphnqb3iGi5u/t0PYRLO0yfVireQgf6eB++0+5BdIn9YTjdUC24PzXEhzmjw4bnIPDLGYXRbb/gB7Xia+pNyn12rOwUdfVSbmCajVpKj1x9amt+/zuw08/fXz/ukoiQ3ZYi02Vw5w9iEivZQFO2UXm9YFYm5htC5uY5H8j3TD+dMymVWSiGmA2rWXMtq+XEzFbC1pnZQyYbcBsR2C2l7azhB4OI+pl7xxEDzMBCsC+hA4RnaL3ieiUc0B0ynGITjGOQnTm+SG6e/hsR8COwc2aJk86R3Y7YhoDu/t/f/fATtXGY2VAdiVZ68hui67MBXIyikUp1oHj2oLs2JxiwxTqbkfbzCke0RWW/0F8WiiLUW8FQlXHmmFYk8Z5xYKnXHI4FaYeUE+LqcUjlc/KGGDqdwRTPy03EXKIlw9ccEPAJA6w30KakejKxU6MQ9sDn7OCFsI/wg4xMrAPwKpax6E3/Rj0pqgdANaT8dvH3z17iX27c+D2AYe+fQJw+/jLmQG3+vBC5IzaABglMqhGGFmNvBBe5DS8c8/dMnYhsz1iHtfyeoWQH0PG48TUYgtZsXQ8Xls17kJu25Q8fnv127Vq+0pqHt+sql7ILafh8aAXDYQMEoJG9XMWrQlVjHuFwoyZYY0svUn9HNO5o7Kgp4ln+bMo80DeoKQ8bDv3imNcjBpUzojYpbUxqjJSVd2StZGiqKqlGF9g1KSURiQhoGbcQ39AF8QEgkQouWZCK7Kv6sstmTQTntfzUGxIlJYgw9pCKytzRNJOrdApeFc0q/ITtVbdNd2Pya6tCGeMiaqa1tgSBi+0rVxSVtWc1igdZZW2m28X78BT8+2n729WRsNApkYpKduqVjmpKlmmqUuqpdYsJ2UlbMU0VZx6jmka1NqyQcurEEdRCBdtRzTJbhEED2x/UrI77NkheLW0w0di4z5DFz+Dtx7+Cl7aMdmYmxm4AfckEoI++LTEMYYedEhI5NBEu0MimZvVKsS2syz6Jwpd5EP9q++KONT1pr4ll8Rj2a5b4knpzTXxOtCde+JJbMtFJfxruSke5QGA1Fg1XHVkW5quwy9k51PpJFRGjgfz3cRdooXNTNxcGli1ny8oLW8tMel48qGkpuCf6d+S85UlBG92TMwddhCMN9l4t5tW4Io9xCCIOQ+UKBwFyLfvfOiTkTYdnzcgh8htFZBrUkQWuWQaim7qk4nFrKMO4XhNqceC8X7hcXuT20m0pVjyp3/dvRqR5T4dmePJZKKbvYVcRPjqKXIOC7+MqW4jEiLrnRgefXw4EiopO4iExKbv1EjodMPKyjiDSKjxxTpWwomX61hWf2YkdMJFO5Zl08t2bNvv4Qju2MuIXVzA2+pYy+FsEqM+YS+2kQNCGK2IfYYUWAZ2TDrreRuAHWcdhih4BBEFmDhw1wTx0b+uvQVyIfBRAAlpCc6HkLYL388T1a//5/Xizwk1Ob05cgkch+t5LNvE9jwZPeF7nviLyy9wb/qmL09LrviSVfBiX3eHLvnWSUgQ25m9SEha2SvqujvPSNSpP20hI0FNC3j35s0boOvxEmx5lbsVdBBZYVEMXhUK+DugNhHQdpebpzD1885TdBHKKupE/jKKsOfbMfUoxkixRophHGPVjvFmjeRfRkDb3oR3lZjSLd0YK5apTPtMTB2QehmT22K2wqidraguN20/W2G2d7e9BfPKyhiyFUO2YshWNNbrhWcrwC8wIlYWxRA44SaKh6REDyiOJ613JMfryJCa6BLd8WT0hPB44r/51ESpuzZSE/3XSpQvj+g0M5HWa70hqnncgFubegfiFZK7KlkpF/Sol7jUHMSk5iuad/lfcg6CTalPRtOpQZxHL5HpbjpdLPwyAtT2prq7shhlbFhT09T7mOOaUi9jclvMPmi1sw/V72RpP/vAmr72sw9NDSsrY8g+DNmHIfvQWK8Xnn0YaiXarJU4CdjxRPUL7ni9GBISXQI+noyeQB9P/DefkCh1d6G1Ej1lJMoiCPA5/SojcSHxEqb1E5zSiYipnbjcZMW0hXeEX2IEq1uqppCtofZSJlFT6mVEsO1NbtuZqMxGus8j9sba7qmGTkDFSNG0Pia+sfxTLtpXMvwOLtGrE0U3FKvG1ZGSsoN0iJFcc58mxnU607XRpP43mFi+1abboKZb05OX1hozXR0Z4/7SIUN4nTSWG2mlKv/R7dvAmgfQb+2HMMNMFPO8cu8hiZ2JEcIusMPQ3kQAP8EQePRyPXkCbYKLCPJ4XNK9RncssXrZhXoX5m8PAgscArhYIPpa2xhEyakOTA98CIrMvpk9BNz9+y+e/N59GK8jx/kxHrfLjfCS90G7iAR2cRLoZU/k7DRYXiEk3ymWNlU03VR1RZko2rhRONf/+bLRTzRX73wZtBSzlb0QH9botQ9r9L4utpqdXmzt7zNIw2HNbuPhsOYw67NBk9/LYc2ANc8Ja353yHI44ukyI8qT0VNWlCf+cgOA2jWnue4us+b0rGKCSznG2csFTvXJeR/j2OEzeuoi1083y5cHJ4XwOIgphtfMqazJ6niijhZrzxut3MXflrHvtZ/wl2AYEvixZ9nq2SnaNgkS56bCvGueed6Ajw+jyH6E81d2EOAYFH0E5TdhQDJWkMzF7CT9bUlsZDD3lPd9HHNwenEGZ2YJBpvqMl0EtyrBXvQGg97L6+5Y0f4B0cO52NHnYmbtczGz83MxazZWR4bZ3ldyWvCFrIzhXGw4F6Nkb+zQ24Abj+y97JsSeAFIJ+kHGglSXDNo8mfsQ6IeO0IR/X7jz+uAlpfGdDAognYEwS3ZjUN9aaeuhyeoT/fD68O3cKTF6c2Ri+nYaIDHtNeIgNeJE6OCLbYnRgb7vM4wOkg6eVqEwJuKb2HHnZ5DEkyc/RCR1enEDNY4x+RQmQTu6+XtLxH28GPyznbmQ8yXmhWyWvhY56XFsuOpKmdTNVrhANJPFZjj8eQYU38Momgkf4hqj4xqVUX8vdcyqk0ou41qVbLh9JFptvii8NP3MitjiGqHqJaSJfEsMVyv8cqm1x7T8w8YYhKtktD1Fsb2A/ZQ5NNA9pY8pef19BLlTbTxVzH2iRFwwOeJrIFb5JQffB0i2o4cEE9a706I15EBae8U6xLN/0V7K9T9eWJttQbWZtv56YeVy5o2n/9h5RYw+qf3+A58/PDmYsG4qZ35N027PaL1wq1Y1NBkS1anujac0NbKwTRV33BAKxzkpQW1qqXLdA3cKupUnfb3napMrC8QO4SwR4ewNa4ElJQdh7DqTJuOpnqLVwJO94CsjCGEHULYpJAPPpNlv8jeu5Acy5LpCOj+osHs78gN4AY8evgBxyGycbTxHBiSaNdNo11vE2Gp+mcS89IS9Q3wh9i2Oz/EE9KXL+LJ/xYiWU5vzvaUtruggNeHb/aQtpsAIenjcEbb4Rktd94u5Ii2Ttqo3SPa92iFXPAZRkSes+whH7T1G2WRTfHW8/L/lgKus0sbs/SP+Q//BxvQAv4zvAAA \ No newline at end of file +H4sIAAAAAAAAAO19S4/jRpbuvn9FoIC+0w0U3w+RuuU00plluxouV05llu9gNokQGSnRphgaksqyauX5FzPADDCz7IUXc3txgd4M0LmdX+Ffck8ESYnKFClKKbL0CMNVlSmdiBOMYMT5zonzePXlz+MQ3ZM4CWj0xQtNVl8gEnnUD6LhFy8+3HwtOS9QkuLIxyGNyBcvZiR58eXZ717FxKOx38/+QROcjr54oajwnwR/DMk0TFcy7N4/vEDAIEr6QZSSOMLhFy9GaTrpK8rHjx9lGnuBD38PlShRCoqiBfGnHk75sKqazEmKNn6QpEHk1bcqEc3bkQk8CU6JX9NsTlO0oumIxFKEx6S61YKmaDUm4wHM9yiYVLda0MxnI45pXDMT7OuC1qPjcd0EZN8X1NkSVlNn3xfUExgWhVWSfJLiIEyq2z2mLHr4icw+1jLMCeZPPobWNU/Ovp7T/py9Q1LgkygN7gJSN2dPiYt+7qZRtgOq2uYE81mJyR2JYeOQuglZEBXtsO/HJKlpkxMsdtF9AC+gNKFJUP+SP6YseoCZ/am6Ffu2oBzBNqHxrJo4J1is0ySkszFMZd1iFTRFq3+a4hCmft1OXyIr2iYkvg+8ms2XEyy/u+ve2PnawAkBcxjULemCZrGbEoJjj236aVzz2i7TLUYIv8fkPiAf64Y5JyraDaZhzaqybx+PDzpI6DSum70npC/Ofofgv1fZ+dHnxKXNk31bppjGwRnrPYHuF12vkBKvlFKLJ90w6XJW14oTPGk2okl6Nmc7J+YfZw+i1D7Jq9KGLe/wEqcyRUg9HJIzEr1SVnycM6zt8lW+o4qtV2JUfOPFhG8BaUzSEfXPLgM4ntNXStX3T3tIpoNxkDBxL/kgzM50VbMkTZNU+0Zz+6re10zZ7un/uOjzcYsnEx3iJJXGgBlg7vyiV11lveo661WDXh1Z1+1/nE/5ijYrnjbEwZj4Z2k8JaVnzD99Sg84JuuPC4VHzR59WdN6EgdjHM/qe1kmypd35QK+ys6V/PipwUrFAVR+wbKmDD+g+yAJBkEYpHAmT6aDMPBe5J2twl2LTspLxd+PxSJptqSakmbfqCpbJN2Ue5q6WKQl6pX9rVz6Nb3WLX3poRfIoT8M7knEQVRydhHiqR/QV0/wxRLVmg7v8DgIZ5z27G0Q4mhVf2WixYoopSUpfbzAeSVYmNQtdonshVLqaVkyPBIotT0+Il3qlb+mGZaq7SODU+WWOQQpsEr9QxV4pdxBjucK4FfbvqBZHvtToLYK6dU/1wr6gsl8TbN/8g8Xsr0EBaRkOmbbvnYOFqBg47PSgYOyb2iyrVsNz8rSMEuKTe1clOmWl3rR11y1qp/VOVVlP3PEt+a1m5NVnFw1E6eZNxqcNHbfNGXL6G1w0pTX+O4OztZMeA5jOp0skzYcitpjQ1HdvuXKtqM3H0q5/9K7migryBaTVZrexZs5TSVQ4AHYaYam9XTTeoFgxSchnklB5JOf4YsXzdZCWfSwQvg8HVn5IR5JGjY9qmRoNzpgAaOv9WQQE00kTVeTX+aRwd3VNE/pJA4fq6nLLZ6JiOu63hAl13W1FjmvbKxsNCuP5pCL01y2o0IkryBZvW5Kg4UrGPpkguOUbxne4zX1AhwiHPnoYhqm0xh+OY/SUUwnNKTDGbqcN1gwetxJLceYhkRKgxTUgHP/xymcvOgqpncgJ2k877FEVP9upsC45h0u084ADjA85s658E/WtoM/7F1aNMs+WNvOx7MzVVtMEp6tW681D7NQNYc4Cj7x03n9MPiKnE9TGtExnSboQxRwC2c6Q/QOfQUQiYSA9OajqF7Bcq85tGm2gzxgdvaUE/+4WQd0GqWgQby+XjTOP1q/A9cOdb5gQYLHg2A45Ydvs1lu1r6sTQ9jOEh6qqPKd4t3o1HLTQfBmudnwDfv31yu4lYiWT+Rm85PyaJQR/pKqRXhj0CK0hyl7B7QbI2tyv3vCtA4lgly3d0ez8w7eC6c0VxJdSTdudEspuIatuzou4Yzz5r6Mg8BZ44YzpRwwxxUcDDzfm4NQBLK7ITwzZvLNzcf3qML2A0xQXfwSUH3EohAvR8HET8zeB9voojeZ78GEbqB4QTJGHWCWewWMUsX+ONdkpKQYHTtjSgNGfAopu9/4fHkf6NvaTIJUsyOoBMFIp9TWC7RlYwPq40Y+W1frQWjoKkyhDy+kKvt7AlxVa+Li+La/kpkVT2VbpVquyrTVfW1dFtX29syZVV/T26iGplBF9RV/eb3g7W9FTRVfbAr09oOOMHmRq2trIGPB/cM3MehjQXgw5VVtyVDFpsaPoGrsJ5h6z0YTS20Y22VBeWqG5InUHFjsxVMRu9G1/qq0Vdd2XJ6u8Z5z5nqMg+B844R5/FNUgOyyqPKyF6HsGcmJEQ+QSFGxGOGkIdfMWA4AHxjHCISoZDCqRXTGYOFCcJwUgBd+PCXYeDB7/40xlFKEHRy8e6HN5dSyYZUB/iUdaNddTSsf64SdbNXodRASmcTcgbQNpo/wZNvN+7zHodTcqZqqin1NMdY2XNGs3HXEV+i4BPxUQqLkATco4bdQr9Yw3DRcnOu0zic7/AJjQGYymzK+D4v5Kjy5vr6e6V+DKyfjZnHJMwAAACUM2ZYlejdyt6XCNdv6bWvzSrKKrSavdnsbfkRZoPRwkADLyTFS1/5Is2POi6SMjy6kTKkqw2UocWB2YhN9jzFo2Q79lsaB59AbQJEdA4InxnAiwMhf8hl+hVY/YlIr8HnawxYewReGp9lGx1l2dnj02CLg6n6UNJUWetZtqbEmMj3mitr2jbH06ZHU1O2TQ6pdQcUTBk/mJryrD2U1h5ICQm3PY2anUQNT6E1cFmzerZjNYHLBWVbcFl1b3S1rzt905AtS8BlAZf3Gi5/BxA5jegwxncMJMN3FI1IHOMxO98wQ9EBt4N6wcNfIuRTDz4mMrqKgzGJGYzGCYPMBE0jhrHvSZIGw5w6oYOYoJRb/iiaIeoF9LCR9HbyqqrLncqsKiZdya0q/q3JriqGu5JfvP/GaLpiNPuksYHocyRTd/TOXq9aht1obPVjOF6NbS5bs4lim+9nucn+q5ySz6kEbuvFs7hIq7kRK7djbju6tYEXz/YKZ6Fl0iGTvK9/nhAmU0HohkLfrF6g/dM3HU3TFLfnuLpjmrpq6668lU1sG+HdmPeuNc/GjE9C/bQNw3XNRrc1OWVrtzUOCzwx1L6pyj1TqJ9C/dxr9fO3X/7lAicEJenUnyGjv3BQfYmuJziIfvvlX4XG2KbUqeLUqeSpGkS70qeK63EokIPdK5ClGTc7e9nWM93VG8aCwOEYCX2A8/xVY5OorOd/wkpk7UZsokkOKP1J8kZ4AmPZXzVy8UwbqJFqN2okiNB39yROMwdLkJdfog8AIGOeuCaIhtyX9S2O8JD98iEe4IUz6zcx/ZiO0IDMKBBdEZC9k8xt7QcaTscE6X3E5fM1yGdQE09bMd3Ci8vQVdNophdklG16cRl9zYT5kB3HEXqB0Av2WS84R/DS0Oz2KY2pP/X4jVIfBWM8JBGaIcLtZSTyAsw8ukhY3DO1py6s2P9l0pKUZEiCX4zRKAwiwl+1q+yE92miZF8RP0ixcnX9mv2rG/LEvzt00bkDcXZJEEwRonEQwuHRbMEPSCjpGncP0PqWJeuq+3mEkmZoDY1VGWWbQkmHmejrIJR2bqx61lSXeQihJIQSF0p3sO14NhfEEDcqwW44r9CfpkmK0hFBXxGA2hGg7S8P23TVlg1B6zm24bqO3qUNYQ3T1m0Ia/ifjA2hmCQPDzIzAoMxyYhO+A8VM3W6cKh0xvTR6589noSKR6t6zFeZZRbi+j3ByTQmCY9UjfE9yXJ65Hr+QSGkk79PVh1V0Uxbs23HkdmbJmtWT1PdXleXypsNYNub5W7veXe35CTwd7rkupTosio5lmrrhmFomtXFOjfkehiLu5tLfNXWDKdRyGVB2UAvUrfyIc9yeal9y5Rd91gu8b2QvYCN4ehq3SjrZL44umQBUJAc1zGeqSKt6qljFanBBK1Sk649OpkmSEKvw4SFpsfNNaVylxiQRcwRCo0DUGCEHps1Vracn8p5rdNtVxN3quV+ReBXdlXk8TQpfVSouHCseWSMfyrulJIJ/M7CaP2pV2RJST9SFJFgOBrQeESpzwHp0wwcB6kOt+XJsVO8V8Wue8xXNZJj8K7YDgNWdblLHFjFoyMsWMX+GBa9FXOYZvZUzTBVVGy+dhZd4jVSni59bZdL3bL2HCqfOWopzWXp8w37GpMkwUOQhDiKaIrmY0WLFIiIPzdiE9+vmail7uonqf69zmbpNOxwuRku4eBRhi+VIPqIYz8veSMD3ZdwzH3x9HDgKbzYU0YkfnP5xdvvnZt35++vS9DuUANIdmCvm3vWEBZTORkJp5kt7idNVe81u5/MKFt2prdYFQfT2HmKS+E0sw963fHcT14EUx/7JEGzecz1lLn70T76239/iDBi4ieLzGa7IgEIAEfGICQt3lM29JkpboVoljiSy6MkSAmrPHeHp2Gq3AUh/Mb3jmopb3gyJ3KbP+fvdTV70o2daGLCwkePR/5UZ95kKvtS3k0hlVbMZp3XTE9vlGGkoGzXlVN1+oYqpJKQSnsvlUJ8z7OMxhhNSJTguOS7l3vyvWW+kKB8PfxnRPbGgROHA5xMs5crP5A9OJAVEEOOoxA4M7NnkEgkhVgaEhbMDQephyUfvh3nzySVH+l0HRleL615iFE2Xw+/sglDMGFovPwSCPG0Yk5r8sW6PVdtli82o2zXqVM1+pYmO6ouxJMQT/ssnrbxN98P+dR6gAE/j32JyaPjkUNX59fvrl+i9wTOvyy/2c08P9kVTuMAuofXs6iGdVBySNdY2nTDZIejbmufSU3StaZqEqdsObhAY5PR23kixmdNdZmHkENCDjGyN3mcGzvS+mjJ7fc8Qq/vaXjPHC+uRiSiY/gTtSeGDvYyVcQWiNgCEVvQ3FYsYgs+C0Cyes0Ku+SULduR1b7Vk3WhqAuAtN8A6TuMvCnLLs3uL0E3e/g1YabDPtPMs8OBKXHzL5g+F+JMZ095la08gzXh1V/G9D7gKa7h54QXQWaOGgJRPUVUkmNKruM4kqubkt0hqnJMxha4rmbaOqpaw/94UdWx456bx2fCMKQDHMosAVIyIewswQny4uxuImHWv0NMNXFQgZQtRtWZPcOFfzuOqqvjehhRdbtb3LaiZHXNsh1Ds4uABde1VbOLdd58AIex5DsMpKwvS14OpNS6CaTsudqRqDgikPJxYxFIub7rvVFJTyWQspweqGQwm4zoIAC19Rz9icFIMkM3I8B4wxH6Gn5Hl8TjPrwlz81Lck9COmG1IV6iK3jMqAjB/I56OEQXNPLgtD5w/bXN8MqdAYUqdt2DhaqRHEOkXcvhlc9QCKp4dKQUVLE/hkVvxX6V7TndRMXmE+GVq8MrayZKhFd2E16ZHw4ivHLpeVaHVy6hoBI+Ega66qXaTxuO6tq2azl6jopsS1WtTm04zQdwGDac3S15ezbZnuFopt3rJKNdQ66Hsbg7NNA1KktQUDYw0OnPM9CxHM0i05kw0FVMqTDQCQNd2wY6nwx5PZw+wrlPCHMTIWPuCTeZ3w0TYWdrHcxVsese0FWN5BhMLu3a2Z4D8qp4dAT0qtgfw6K3k8asZ/Z6tmmjYvMJO1tFGrPqiRJ2tk7sbMXhIOxsS8+zbDL7U/Ybu4W8niYpDiI8CMmOff4XzCs0VVtzdceyG9QqWlDu3pVEcyXVZQF/aq9v6X3DYGUvhaZaqalqzF3YlDRXe66muqqnw9BUL2KaJDG5e9ydUHKEkrOW3f4oOVX1tzcbwQEV4X4GEtl8jg4XYZTbjXlFZG0BZMcblEhetGqzRHInaKbTFA9G39RkTbP2+85wS1WzNhxpEcG7Wg3a8e3RbqK7t45Bek5k97HcVtm603MaprbKKNuMmNX6ltpXLdntme2kFNlua5d5iIhZETG7w/j+Q9YD2k8zspUUqur/1POMHArOb5pkRNkI6x9lvrWL86/eHBTCFl55TxVXq8fSkXxGr7yaARyG49bulrzVSGmr1ysdDJ145dVyPYzF3Z1XXs9t6pXHKLsIm9VcV9x1CK+81VMqvPKEV96utdZyvRL2xTSC8w2Bfhqw19MjvMzoiKDzKB3FdEI9EpE+wh4rVzqF4zwaopSMJzRmBt+FBnzYemyH91nbY70qdt3jvaqRHIP/VuvBsVtjwCoeHeHAKvbHsOjCaU847TV4u4/Waa84HITT3tLzHO01914k6jVNtZE6XlC2WYZU68P/hitbR+N6KNQ73vj4rh2vMauc4j3J0zuN0CSmcCKNcZ5xE8EL7DFI0KK34QYFSEGr9GgEKmPCT9+sCGn+MHAY56mEPSxNI6l4Eil7Emn+JJKmqr2es5FEosC5i/yqTgM5VG435p5c6iKOZtzctctY4LQ2XbtuRoTlv5ovmpByKyetRspZltmw2HZG2bKUc/qqKttl9xch5YSU2z8pV/arDxI0ANEx5glp0Bj/COpukCSgPt7BTx4/JhD2WEwAN10ymbEfIo+7ehAY5XTM3zZW+87HCpMVitpTSvZTCURdTD/CIxYiLxeBJy/pdLUTSfd/mEcOeg0IBV40D33N1kxIu5UTV6fT6VajxCcFZZvSjt2v9jVH7lmiiLeQdoci7VjgWC4JCjXusSTYNyWuiRhjepuquycvzUrMutTb0GmLssZbYaPLptZ8BHVLMy3lx6wEMbuydWTNllVTW8lo9y6Cm/HfRYROKbJvM+YnEaKjWY5hNqr4UFC2hatU90ZX+4bbNw3ZOpqKDwJX8cZHjateojErQpfiuIjLnxVlp+5YxQAZfWDV6wbwbiEWvxNzVBP4Ldaj78KzoSXPpp1KqCpunUupqoG0L6mqOB+Hc03SgnON7VpST9O7e+FqGe7q7ZrQOMWhzKaMv2UxyU5E5c319fdK/RiOt2LiIz2U7cKf5Y024n46rrSpeO5Aibw6v353LaP3BNBkillp15tMYILovMJpHED3IOwvpiG7xA2Film9cvuX6IHVndVsR3ckgJmdZITnNV85S9PdbQb4hvVma3mfhCJpG6pj280M9Bllm7ke9L6h9zVWHV0okkKR3GtF8iKY+phld0M/YI/7K5FEZhbWO9gRrMj5NB7AQHkxY5JMGEVSKpZ+4LXj2qx9/jwZVMWifTlUxbl1WVTF+CAzPUxoKIUkkmGClDAMBqDvKB7faz65n+805cv7L1TT09w7rJFeT98I1neb9qEJpN8FNKfh3/4fTBx67QfZHJ02+t7GV0HXjYZQKKNsEwoZfd1kaa9UWxdQSEChfYZCb5ht3J96mQc6RiFG3hwd3ZfQ0flkGqUkQQmFY72wtaPZEzs8nGJfYRhhSCMscJLASV3ipAOxeHYElRhGkrwRnqR743ezA6x0seJ06rM7vsa62ykjq0Oza7pwXNimqVpb1SLe4oxcw7BVm+Ya3idi0zRVp5FzTEHZrtOxqffVHsu6IYC8APL7DOTfkwTErjfiUTQ378+/RxfwdZCmhCAJLbkkB2MQiynPGTShCeyekMAPsHF45qAJHAUECei+e3FU1X37IqmKc+tiqYrxQZo32QyRaUxBzQjzH+Af5T0ZXuIUKySd+gDZr28+XGYxYbbuao6pvLl6990tfPoH9uEfsw9vX38vT/y7jfB8TJg/yfEg+ddsBgmO0BXMZ4BZUZ2X6Co7hy4J0+XYR/w8u4bN4HG3BH5qXdARSZgfPKcOhLl0c5TV07RmVQJyynYDmS2T1bTWVBHaJVDWXqOsLF3HJ2Z1QPMUF4iwqmABO60C2BQswitlNlH4nGViTFISfcII53ZRYRitNow6kms6titZjtRZUkVmmHQ4W8v5TKUC1vA/XsPovrtr7gDlnEc4+om8RNcTwu5HiIAqKyetrqCRpjYtaMQp2y1opNt91ZR7Rk9AFQFV9hqqMG0RzauZJgBA/Di4J3GWZyV38+b6FOzPOEhpzO5vs6Se6awP2OUuZOVNQTQgmqWRHpN0RH0a0iEchtCW1z9NWA5FzKBOZoAKeOqyIfx+8I5yLYS66IZjS47VU7vCN/UMuwl1qR/D8eKbR0akHAMGP9JknOapXEEeyZPRRMk+VHL8o9wH5KNiGHbpKD3UmJcdgKg3EZ9q1hKOnXLqVp5HEESjh97iCA95aeblimwCbq2Y3roUd46rN0xxxynbtQxpet9wZMs0BNwScGuf4dYNgXc9AhWPe57cB/hHkqAZSlbmd20PF22Yzg5WiYQEZ9lbg5Qkik/uMGBD5S4I4Te+XVRLWTzd73WVRPDXD/wJ4YcZ/Cmy2P7+wvj9Vwb7+mYas1/OL/nz3r7L+Zz6nUc+DyC3RpRyCVau4PItTSYBwCg4S09bbjV+3zdSA9ouCWe5PU3VXGcbZL8xqm/G9ZRKwtmGDnKsYQwAp2w3BsDgJeFsa9elrwV0EdBlt9BlxC6qguieqVI0YlLJi/EA3QWwsVmWXmY7Cimz+IxIDAJqSBZ2pRSOJm4kYnaf70FYjwi8hugrdksWHrb9p6VsO6ahm+zWB7az0QPk4XSZZqch852VkOrSMlMxoC1Xv93SYc/BClU8OsILVewPzqdr88pKxdztorLSkYZnMITDhMa3haTg/qg7NcYteFdiQV70t1E8aEHZfnlgQ+7px1KpQZQHftxY2XiCVkFDUR5YlAcWyH7NswlkL5C9QPYC2T8d9P4ge4Hol0Yhrikq/vsM1xS22jMNV+v4mqKW62FcU+xucVuoMqE5hqUrJA6xl8iaamhbOdBvgTsaMj6MJd7NTRS3KTiNrQ9OF9YHU1atY/FZFtaHx42F9WF918L60LX1oajVAWdNiFhEXEqQT+5JSCdzP02fTAhOcJQGeW2rIELfMcCJzscAONv0lTpgS8NupH0Vm+4kftUIjqFSRrtmhueg+CoeHSH5KvbHsOitxMZqTs81e5aKAGSZMP8tLbpE4hhU+ydLX9vlUresPUfBZ46qluyPi8837GtMkgQPQcjhKKIpmo81D1MCeBkj/tyITXy/ZqKWuqufpPr3OpulAzKedmpHKw6HXdjRjjacZZ75hBVjIR/ZtcwS5ImyLCc4DgYDRnadAsITWU42VcNtvWc7DUOHM8p2Q4dhPjRDNhz1SNRwodbxxkfoEDrP7nzDhR37RUaXrFZ0ApBgRiKfIhKiYIxBs4RjirJ8rCFGPqsOjW547tUQeTjhX9w8/DUinw5bj2un7J1jsbPB0POH70iDK7juHMBvUPuudgynFhBMEibgaQKLlO08BrVkHCtXl18rP+g95XvVVO71XmRi1dw4hup4YVQ+bayAHYl5LoLgPou/y08wgZhWTmNNCI1t9KxmITQZZbvRv6yomCY7lsgLJxDTISAmXnu6j7wpzhFSUScDTvYYs4MJQBIeAn7iBThDOLt45jgmnmmSl9AgIm3KCsOnaeuSbVj/0BVKqmfYUdqU2jGcDEpiEwWiimBe2gEPvIil0fMnLMMQN0oB5OgpAJF0SykXgZeyfSgRkPzZxxLfh/C3FGIp24eSTyTYh1JpH0rFPlQEzmKny1s29+gDn/zTRlSND9rNfFxaLQvsOKYO8t2Q3C4cgniZH87SMFYzbLkscC3vEymhobqW2bAsMKdsuSywy0to6CIOXoD4vQbxr8PCbpAbMMdTBuNjlidRRn/77/fTIMKIsmJTCQ0DP0/rw02fCWLWUTp++BV+9PB4QhIghk8RbKCHXwFq4OTLQ8f17dXDe56QqmLRvqCq4ty6sKpifKihGhOc0IRGYRARPkesHB4NiZfXweNfS6z4L1Y005Ki6ZjEFETAMrhnv/FNK/FNC3/DJpSoVNqxDP0C4scZyic+9kkiLTYsk3cB360bYf9uyxL3usH9V+fX765f8ktqls2e3eTML4aucBoH0D0c3xdFKtnT1g22KlpsmQ2rcGSULScs6vUNQ3Z0UbRYALW9BmpLRYvl0qlUHOkZZnuJuA0WNmmMH/6LFvfRIfKpl9IY/Yn8NAtDaPY2ltG3M58IfCbwWZf47EDsr8cB0TrOt9MRRutAZz1lUNemwbe9CF/DVF3bUbeqLb99XEAt18MI/9zd4rYR4Qv/KXiMI1nTXavXxeI25HoYi7sb6zqL2G3mVFxQdhHba5jHYl0Xsb2PGysbT9AqHU7E9orY3h2q4O/SEZyhJEnQeZSOYjrhRaEAvKEbyiKE0JsBQ//zgJZlqlLi+8PWudsK732+qK/i0ZG4r2J/DDGe7Qb2Pge8V/HoCMBXsT+GRW8nsNcyHc00DMSgRU93W1r0IwjsrZ4oEdjbSWBvcTgcZmBvR9a4lXAnSU/bhrZVGIplNg1D4ZQth6E4fVOT3aPJnyV0Nt74+C5G392DXpjnb56hsi+8jEoF6oYhHTD/dqDxaJTCZqVJlga6WezJEmMGKkK+F3gLBDMxnIIszncziV4sDatUm3Myykb2DR8OwBMYYcIJspTU+dhKQ3rEa5faYsPCebngxGSS3RWyisfBPU0Un3rTMQ8fUPKierdBdAfggtzSxePfzm6XVmVv6+F1JDBFPbz2RKitag1FKKdsV4QaBs994R5LAQwhQnnj4xOh32EURHg65PVM6f/8c35Xjn775d8ucIK5/9A1DX/75d/Zhfs3jAqDpGL37z+S8STkGS98knARFnwqOmGn/v/8834VfyWhH7DkHTLz5MBwzk6jGVboJIiYkqqUpiGSPHh2KaGhNORPLC0/YXSrGfotvHmmq5nyKB2HG0m1iHxMJnhC4g41QruBgCu3gz+wlbRFworsg7XtfDw70xYOROzXtoQpCS+L9TwocanrN5rTh/8NTbbLJZI6FZeg3jUUl5yyXXFpmqzsuWPu2hX3WVNd5iHEpRCXjOxDhNGQRCSrcc5kX6HfPPzfQeC1GfNUJe0WooOyS8pjkyVmJ7IEYJAfxMRL8YmHdTR+Hze6hGrNSUw3dVNTYg8HMnsxZQ20Ll3WDasjd7HN+O8iFhwmkUuIjZmfRjC47Viu3gjY5JQtAhvN6Jtq39ABeogYIwFs9hrY5FGNzEY+wTH26Y/wM3zy8CtX4g88R1NLfks7lT5V3DqXQFUDaV8KVXE+DoeaNjKqaoYh2Zbb3QtXy7CjZKq1YzisMLWKAe3TOwab2pDULlPR1TLsKBVd7RgO6x17nt+U/7O80WHfODYx5sUA9tZcoS6M2BuYK3S1E3NFkaPi4s3lu6+Q/3fndyRO0JuIry+7oMDhYdnE9+EKWXdMtZkXVk7ZbvkEw2JWa9cUyYCF6rjXquM5S0/BfT8ffsUv5zmAZ+xaGMcxDUMerO1NCaiSD3+J+uVMwdDTNApSXlSB3TYP4LU7bF2zFRCm6rBzLau72Jh6ht2AsPoxnAwI4xOVQQUm2hMZzzdcgGXAD8o/TbEPnSZK7k+gMGSlaKaxESrr3k29CTLbAVr6+3x6uGvL372J4BhKpym6YD4gD//B4FNpQk/8/mertF623TStF6dsN60X8yUApcRxBW4SuGmfcdNFnoKGV3SBnzzsUxCACI537oI3iemYFvlrFogJSILMsz3Mc+BckjDNPPWuMOzjh18HGP3hqxgnQfjHQ8dSref4AoTRFaYq5djS2jWgrs3xVcH/ZDBVRY6vqwxj+DTP70Wz/F5X16/Zv7eafXtJ7gLQV4J7ekt4/tONYxo6TszVEcYqsnK9L51OM/QNK0ABctHL83C9Z3m6BMBaOZ81AKunmk4zgJVRtuys6fZVV9Z6mgBYAmDtM8B6iycZLCoOpyzy7hRcNK0Gp365XX7nsbC0bHDnUSo73uadxzmA3CBGb/EQfwJ5jc4vX393/v3Nu2tk2Orf/iqkyso5rJEqhqM2DAHIKNuUKmpfA7XdlXVXqO1Cquy1VPkO8yKGWShYkVyWooc/p1Hg0azoYQLK+8N/etxzruxFx6XR51HXNwyWg2WigyLvccn6DL8lJA7IrU9uSx8r0AcdxvguwMnSzyS6zfTglN7eU48ktzjPoHw7u4X9nf+yzxrUlrLUbiJLdyAXX0cohZeNT3Ef8Tl+iYp5BR2sPMlCRK6YzhqPAE3VmlUWyylbtGxrbh/+BxFp6o4QkUJE7rOIvCBxSj7lqdW94peSEbufi1BWu2IGItObMkMRbU/6HezFP3dENGHP5w/fjaV6zvVzu2BWjuFkLNVPAVkDNBbRFABWsOQPqETarabZt8V+vCW3i80JHYRFqp7b+d4EiDbfm8q27f5QZEj6I0DJUHq977nztoR7Vkdw73u2tGh5aTM/UAHvVk5frQWkYaxgTtmuBUTX+6Yl26qwgAh4t9fw7vLhzx72MQL5jaYRRsSfFhd8LHaQuSSUXD8Tlh8uCgYh6WdAg2WJy6UXGgAVrz2fuzKwjHJxtOwWWhTvSUCisB88GiEmf0r3ing8YPCF1QB6HS4Kn0Evk5D8SIsRssOSZTZ6i4PJw19/++XfXzK21zhKAzzkjC5GQXjw/qft+EzYak/VbUs39a5Q6HqmrftLrOF/Mig0n6MwGMQ0kWHTj+XxzwrLWBlE0yArWRZPJzyEWJpJZEziYZZ9U1qaRYn3sHnKrD00+e0Ay12U5+8lKiaQqcSLCZTRd1x5TgJA0SEnRdN4gHnpsgidjx/+zJIwAxX0hU8bBDY+iTc6iNutX2Y6LNG4oWnaNufqxpp9M66nVuKq1+gmtKBsv8SVIff0Y0m/LUpcPW6sbDxBq9QCUeJKlLjaoVb3dUzHmYMmSibYIyilKEvjnWYf9NE56GjpiF/uTkYzlr7aJ/ckpBOWCRwFEUpHpHSp/T64J3F+3c0uuD8F4R9fXhLQ5CZMGcuqtiI8/31+UU77iCXaI2l2j/zwa3ZdvhxPWLpJPzq/9zZLLj0Pb1Tx6AhzVLE/jgwxLSjuLIaShVAizYbJ193Vtymi5FK/ZqJEyaXtLQaNSy4tDofDLLm0QI+fLZYVPwpl7cAWsPQd6yJ/O5Y+X/y4YPAqfw/62T9nv/v/DSZhtnENAgA= \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_0000-0001-5004-5918.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_0000-0001-5004-5918.xml new file mode 100644 index 000000000..9534686ae --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_0000-0001-5004-5918.xml @@ -0,0 +1,1202 @@ + + + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + + en + + + Direct + 2016-11-06T20:12:32.296Z + 2020-04-23T07:30:59.917Z + true + false + false + + + 2017-01-04T07:46:27.991Z + + 2016-11-06T20:12:32.525Z + 2016-11-06T20:12:32.525Z + Aurélie + Prémaud + + + + + + + + 2017-01-04T07:46:27.991Z + + 2017-01-04T07:46:27.991Z + 2017-01-04T07:46:27.991Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + ResearcherID + A-2095-2017 + http://www.researcherid.com/rid/A-2095-2017 + self + + + + + 2019-04-08T23:37:26.263Z + + + + + + + + + + + + 2019-04-08T23:37:26.263Z + + 2019-04-08T23:37:26.263Z + + + doi + 10.1155/2019/7245142 + 10.1155/2019/7245142 + https://doi.org/10.1155/2019/7245142 + self + + + + 2019-04-08T23:37:26.263Z + 2019-04-08T23:37:26.263Z + + + https://orcid.org/client/0000-0001-9884-1913 + 0000-0001-9884-1913 + orcid.org + + Crossref + + + A Prognostic Tool for Individualized Prediction of Graft Failure Risk within Ten Years after Kidney Transplantation + + + + doi + 10.1155/2019/7245142 + 10.1155/2019/7245142 + https://doi.org/10.1155/2019/7245142 + self + + + https://doi.org/10.1155/2019/7245142 + journal-article + + 2019 + 04 + 08 + + Journal of Transplantation + + + + 2018-10-03T15:11:13.783Z + + + doi + 10.1371/journal.pone.0180236 + 10.1371/journal.pone.0180236 + https://doi.org/10.1371/journal.pone.0180236 + self + + + + 2020-11-30T01:02:03.444Z + 2020-11-30T01:02:03.444Z + + + https://orcid.org/client/0000-0001-9884-1913 + 0000-0001-9884-1913 + orcid.org + + Crossref + + + An adjustable predictive score of graft survival in kidney transplant patients and the levels of risk linked to de novo donor-specific anti-HLA antibodies + + + + doi + 10.1371/journal.pone.0180236 + 10.1371/journal.pone.0180236 + https://doi.org/10.1371/journal.pone.0180236 + self + + + https://doi.org/10.1371/journal.pone.0180236 + journal-article + + 2017 + 07 + 03 + + PLOS ONE + + + + 2018-08-23T12:01:11.624Z + + + doi + 10.1038/clpt.2014.140 + 10.1038/clpt.2014.140 + self + + + wosuid + WOS:000342675400030 + wos:000342675400030 + self + + + + 2018-08-23T12:01:11.624Z + 2018-08-23T12:01:11.624Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Exposure to mycophenolic acid better predicts immunosuppressive efficacy than exposure to calcineurin inhibitors in renal transplant patients + + + + doi + 10.1038/clpt.2014.140 + 10.1038/clpt.2014.140 + self + + + wosuid + WOS:000342675400030 + wos:000342675400030 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000342675400030&KeyUID=WOS:000342675400030 + journal-article + + 2014 + + Clinical Pharmacology and Therapeutics + + + + 2018-08-23T12:01:11.635Z + + + wosuid + WOS:000336395700020 + wos:000336395700020 + self + + + doi + 10.1007/s00280-014-2466-0 + 10.1007/s00280-014-2466-0 + self + + + + 2018-08-23T12:01:11.635Z + 2018-08-23T12:01:11.635Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Pharmacokinetics and exposure-effect relationships of capecitabine in elderly patients with breast or colorectal cancer + + + + doi + 10.1007/s00280-014-2466-0 + 10.1007/s00280-014-2466-0 + self + + + wosuid + WOS:000336395700020 + wos:000336395700020 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000336395700020&KeyUID=WOS:000336395700020 + journal-article + + 2014 + + Cancer Chemotherapy and Pharmacology + + + + 2018-08-23T12:01:11.639Z + + + doi + 10.1007/s40262-013-0037-x + 10.1007/s40262-013-0037-x + self + + + wosuid + WOS:000318524800005 + wos:000318524800005 + self + + + + 2018-08-23T12:01:11.639Z + 2018-08-23T12:01:11.639Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Ciclosporin population pharmacokinetics and Bayesian estimation in thoracic transplant recipients + + + + doi + 10.1007/s40262-013-0037-x + 10.1007/s40262-013-0037-x + self + + + wosuid + WOS:000318524800005 + wos:000318524800005 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000318524800005&KeyUID=WOS:000318524800005 + journal-article + + 2013 + + Clinical Pharmacokinetics + + + + 2018-08-23T12:01:11.643Z + + + doi + 10.1016/j.phrs.2013.03.009 + 10.1016/j.phrs.2013.03.009 + self + + + wosuid + WOS:000319645300006 + wos:000319645300006 + self + + + + 2018-08-23T12:01:11.643Z + 2018-08-23T12:01:11.643Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Impact of longitudinal exposure to mycophenolic acid on acute rejection in renal-transplant recipients using a joint modeling approach + + + + doi + 10.1016/j.phrs.2013.03.009 + 10.1016/j.phrs.2013.03.009 + self + + + wosuid + WOS:000319645300006 + wos:000319645300006 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000319645300006&KeyUID=WOS:000319645300006 + journal-article + + 2013 + + Pharmacological Research + + + + 2018-08-23T12:01:11.646Z + + + doi + 10.2165/11594050-000000000-00000 + 10.2165/11594050-000000000-00000 + self + + + + 2018-08-23T12:01:11.646Z + 2018-08-23T12:01:11.646Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Bayesian estimation of mycophenolate mofetil in lung transplantation, using a population pharmacokinetic model developed in kidney and lung transplant recipients + + + + doi + 10.2165/11594050-000000000-00000 + 10.2165/11594050-000000000-00000 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/22054177 + journal-article + + 2012 + + Clinical Pharmacokinetics + + + + 2018-08-23T12:01:11.650Z + + + doi + 10.1016/j.phrs.2011.01.005 + 10.1016/j.phrs.2011.01.005 + self + + + wosuid + WOS:000290892300011 + wos:000290892300011 + self + + + + 2018-08-23T12:01:11.650Z + 2018-08-23T12:01:11.650Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Inhibition of T-cell activation and proliferation by mycophenolic acid in patients awaiting liver transplantation: PK/PD relationships + + + + doi + 10.1016/j.phrs.2011.01.005 + 10.1016/j.phrs.2011.01.005 + self + + + wosuid + WOS:000290892300011 + wos:000290892300011 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000290892300011&KeyUID=WOS:000290892300011 + journal-article + + 2011 + + Pharmacological Research + + + + 2018-08-23T12:01:11.653Z + + + wosuid + WOS:000290557800004 + wos:000290557800004 + self + + + doi + 10.1097/FTD.0b013e31821633a6 + 10.1097/ftd.0b013e31821633a6 + self + + + + 2018-08-23T12:01:11.653Z + 2018-08-23T12:01:11.653Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Large scale analysis of routine dose adjustments of mycophenolate mofetil based on global exposure in renal transplant patients + + + + doi + 10.1097/FTD.0b013e31821633a6 + 10.1097/ftd.0b013e31821633a6 + self + + + wosuid + WOS:000290557800004 + wos:000290557800004 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000290557800004&KeyUID=WOS:000290557800004 + journal-article + + 2011 + + Therapeutic Drug Monitoring + + + + 2018-08-23T12:01:11.656Z + + + wosuid + WOS:000288041400008 + wos:000288041400008 + self + + + doi + 10.1016/j.phrs.2010.10.017 + 10.1016/j.phrs.2010.10.017 + self + + + + 2018-08-23T12:01:11.656Z + 2018-08-23T12:01:11.656Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Population pharmacokinetics of mycophenolic acid in pediatric renal transplant patients using parametric and nonparametric approaches + + + + doi + 10.1016/j.phrs.2010.10.017 + 10.1016/j.phrs.2010.10.017 + self + + + wosuid + WOS:000288041400008 + wos:000288041400008 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000288041400008&KeyUID=WOS:000288041400008 + journal-article + + 2011 + + Pharmacological Research + + + + 2018-08-23T12:01:11.660Z + + + wosuid + WOS:000275009700011 + wos:000275009700011 + self + + + doi + 10.1016/j.phrs.2009.09.006 + 10.1016/j.phrs.2009.09.006 + self + + + + 2018-08-23T12:01:11.660Z + 2018-08-23T12:01:11.660Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Feasibility of, and critical paths for mycophenolate mofetil Bayesian dose adjustment: pharmacological re-appraisal of a concentration-controlled versus fixed-dose trial in renal transplant recipients + + + + doi + 10.1016/j.phrs.2009.09.006 + 10.1016/j.phrs.2009.09.006 + self + + + wosuid + WOS:000275009700011 + wos:000275009700011 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000275009700011&KeyUID=WOS:000275009700011 + journal-article + + 2010 + + Pharmacological Research + + + + 2018-08-23T12:01:11.664Z + + + doi + 10.2165/11535950-000000000-00000 + 10.2165/11535950-000000000-00000 + self + + + + 2018-08-23T12:01:11.664Z + 2018-08-23T12:01:11.664Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Population pharmacokinetics and Bayesian estimation of tacrolimus exposure in renal transplant recipients on a new once-daily formulation + + + + doi + 10.2165/11535950-000000000-00000 + 10.2165/11535950-000000000-00000 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/20818834 + journal-article + + 2010 + + Clinical Pharmacokinetics + + + + 2018-08-23T12:01:11.671Z + + + doi + 10.1097/FTD.0b013e3181a8f0ae + 10.1097/ftd.0b013e3181a8f0ae + self + + + + 2018-08-23T12:01:11.671Z + 2018-08-23T12:01:11.671Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Performance of the new mycophenolate assay based on IMPDH enzymatic activity for pharmacokinetic investigations and setup of Bayesian estimators in different populations of allograft recipients + + + + doi + 10.1097/FTD.0b013e3181a8f0ae + 10.1097/ftd.0b013e3181a8f0ae + self + + + http://www.ncbi.nlm.nih.gov/pubmed/19571778 + journal-article + + 2009 + + Therapeutic Drug Monitoring + + + + 2018-08-23T12:01:11.675Z + + + doi + 10.2165/11318080-000000000-00000 + 10.2165/11318080-000000000-00000 + self + + + + 2018-08-23T12:01:11.675Z + 2018-08-23T12:01:11.675Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Tacrolimus population pharmacokinetic-pharmacogenetic analysis and Bayesian estimation in renal transplant recipients + + + + doi + 10.2165/11318080-000000000-00000 + 10.2165/11318080-000000000-00000 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/19902988 + journal-article + + 2009 + + Clinical Pharmacokinetics + + + + 2018-08-23T12:01:11.678Z + + + doi + 10.1111/j.1365-2125.2006.02509.x + 10.1111/j.1365-2125.2006.02509.x + self + + + wosuid + WOS:000240556900012 + wos:000240556900012 + self + + + + 2018-08-23T12:01:11.678Z + 2018-08-23T12:01:11.678Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + A comparison of the effect of ciclosporin and sirolimus on the pharmokinetics of mycophenolate in renal transplant patients + + + + doi + 10.1111/j.1365-2125.2006.02509.x + 10.1111/j.1365-2125.2006.02509.x + self + + + wosuid + WOS:000240556900012 + wos:000240556900012 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000240556900012&KeyUID=WOS:000240556900012 + journal-article + + 2006 + + British Journal of Clinical Pharmacology + + + + 2018-08-23T12:01:11.681Z + + + doi + 10.1097/01.ftd.0000197092.84935.ef + 10.1097/01.ftd.0000197092.84935.ef + self + + + + 2018-08-23T12:01:11.681Z + 2018-08-23T12:01:11.681Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Determination of mycophenolic acid plasma levels in renal transplant recipients co-administered sirolimus: comparison of an enzyme multiplied immunoassay technique (EMIT) and liquid chromatography-tandem mass spectrometry + + + + doi + 10.1097/01.ftd.0000197092.84935.ef + 10.1097/01.ftd.0000197092.84935.ef + self + + + http://www.ncbi.nlm.nih.gov/pubmed/16628144 + journal-article + + 2006 + + Therapeutic Drug Monitoring + + + + 2018-08-23T12:01:11.683Z + + + doi + 10.2165/00003088-200544080-00005 + 10.2165/00003088-200544080-00005 + self + + + + 2018-08-23T12:01:11.683Z + 2018-08-23T12:01:11.683Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + A double absorption-phase model adequately describes mycophenolic acid plasma profiles in de novo renal transplant recipients given oral mycophenolate mofetil + + + + doi + 10.2165/00003088-200544080-00005 + 10.2165/00003088-200544080-00005 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/16029068 + journal-article + + 2005 + + Clinical Pharmacokinetics + + + + 2018-08-23T12:01:11.686Z + + + wosuid + WOS:000225839800019 + wos:000225839800019 + self + + + doi + 10.1124/dmd.104.001651 + 10.1124/dmd.104.001651 + self + + + + 2018-08-23T12:01:11.686Z + 2018-08-23T12:01:11.686Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Identification of the UDP-glucuronosyltransferase isoforms involved in mycophenolic acid phase II metabolism + + + + doi + 10.1124/dmd.104.001651 + 10.1124/dmd.104.001651 + self + + + wosuid + WOS:000225839800019 + wos:000225839800019 + self + + + http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=ORCID&SrcApp=OrcidOrg&DestLinkType=FullRecord&DestApp=WOS_CPL&KeyUT=WOS:000225839800019&KeyUID=WOS:000225839800019 + journal-article + + 2005 + + Drug Metabolism and Disposition: The Biological Fate of Chemicals + + + + 2018-08-23T12:01:11.689Z + + + source-work-id + 0823180801209-17 + 0823180801209-17 + self + + + + 2018-08-23T12:01:11.689Z + 2018-08-23T12:01:11.689Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Maximum a posteriori bayesian estimation of mycophenolic acid pharmacokinetics in renal transplant recipients at different postgrafting periods + + + + source-work-id + 0823180801209-17 + 0823180801209-17 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/15905807 + journal-article + + 2005 + + Therapeutic Drug Monitoring + + + + 2018-08-23T12:01:11.692Z + + + source-work-id + 0823180801209-19 + 0823180801209-19 + self + + + + 2018-08-23T12:01:11.692Z + 2018-08-23T12:01:11.692Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Characterization of a phase 1 metabolite of mycophenolic acid produced by CYP3A4/5 + + + + source-work-id + 0823180801209-19 + 0823180801209-19 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/15570183 + journal-article + + 2004 + + Therapeutic Drug Monitoring + + + + 2018-08-23T12:01:11.695Z + + + source-work-id + 0823180801209-18 + 0823180801209-18 + self + + + + 2018-08-23T12:01:11.695Z + 2018-08-23T12:01:11.695Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + Comparison of liquid chromatography-tandem mass spectrometry with a commercial enzyme-multiplied immunoassay for the determination of plasma MPA in renal transplant recipients and consequences for therapeutic drug monitoring + + + + source-work-id + 0823180801209-18 + 0823180801209-18 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/15570184 + journal-article + + 2004 + + Therapeutic Drug Monitoring + + + + 2018-08-23T12:01:11.697Z + + + source-work-id + 0823180801209-21 + 0823180801209-21 + self + + + + 2018-08-23T12:01:11.697Z + 2018-08-23T12:01:11.697Z + + + https://orcid.org/client/0000-0003-1377-5676 + 0000-0003-1377-5676 + orcid.org + + ResearcherID + + https://orcid.org/0000-0001-5004-5918 + 0000-0001-5004-5918 + orcid.org + + Aurélie Prémaud + + + An animal model for the study of chronopharmacokinetics of drugs and application to methotrexate and vinorelbine + + + + source-work-id + 0823180801209-21 + 0823180801209-21 + self + + + http://www.ncbi.nlm.nih.gov/pubmed/12383710 + journal-article + + 2002 + + Toxicology and Applied Pharmacology + + + + + diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml index 7abc2f35a..5cf9528c5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml @@ -732,7 +732,7 @@ part-of - + 2001-12-31T12:00:00 2001-12-31T12:00:00