From 83504ecacebcab44212a0679350dfa35a839c484 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 28 May 2020 13:52:30 +0200 Subject: [PATCH] limiting the maximum number of authors allowed in XML records to MAX_AUTHORS = 200; authors with ORCID can exceed that limit --- .../CreateRelatedEntitiesJob_phase2.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index 7655d0da6..cc9f17ee7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -5,7 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Optional; +import java.util.function.Predicate; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -22,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; @@ -63,6 +66,8 @@ public class CreateRelatedEntitiesJob_phase2 { private static final int MAX_EXTERNAL_ENTITIES = 50; + private static final int MAX_AUTHORS = 200; + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -205,6 +210,16 @@ public class CreateRelatedEntitiesJob_phase2 { .collect(Collectors.toList()); r.setExternalReference(refs); } + if (r.getAuthor() != null && r.getAuthor().size() > MAX_AUTHORS) { + List authors = Lists.newArrayList(); + for (int i = 0; i < r.getAuthor().size(); i++) { + final Author a = r.getAuthor().get(i); + if (authors.size() < MAX_AUTHORS || hasORCID(a)) { + authors.add(a); + } + } + r.setAuthor(authors); + } } return e; }, Encoders.bean(entityClazz)) @@ -214,6 +229,18 @@ public class CreateRelatedEntitiesJob_phase2 { Encoders.bean(TypedRow.class)); } + private static boolean hasORCID(Author a) { + return a.getPid() != null && a + .getPid() + .stream() + .filter(Objects::nonNull) + .map(StructuredProperty::getQualifier) + .filter(Objects::nonNull) + .map(Qualifier::getClassid) + .filter(StringUtils::isNotBlank) + .anyMatch(c -> "orcid".equals(c.toLowerCase())); + } + private static TypedRow getTypedRow(String type, OafEntity entity) throws JsonProcessingException { TypedRow t = new TypedRow();