From 0fb6af5586ac0532745bd9fde1347b1c972fcf8a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 29 Jan 2024 18:12:33 +0100 Subject: [PATCH 1/2] Updated main pom dependency against dhp-schema, from 8.0.1 to 9.0.0. The new fields included in the updated schema module are populated by the Solr JSON payload mapping, which also limits the number of authors serialised to 200. --- .../dhp/schema/oaf/utils/ModelHardLimits.java | 1 + .../model/ProvisionModelSupport.java | 19 ++++++++++++++++++- pom.xml | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java index 36d138ba1..74cd1b42a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java @@ -12,6 +12,7 @@ public class ModelHardLimits { public static final int MAX_EXTERNAL_ENTITIES = 50; public static final int MAX_AUTHORS = 200; + public static final int MAX_RELATED_AUTHORS = 20; public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000; public static final int MAX_TITLE_LENGTH = 5000; public static final int MAX_TITLES = 10; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 4a2326453..bc02b595f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -5,6 +5,7 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -150,6 +151,12 @@ public class ProvisionModelSupport { rr.setPublisher(re.getPublisher()); rr.setResulttype(mapQualifier(re.getResulttype())); rr.setTitle(Optional.ofNullable(re.getTitle()).map(StructuredProperty::getValue).orElse(null)); + rr.setDescription(StringUtils.left(re.getDescription(), ModelHardLimits.MAX_RELATED_ABSTRACT_LENGTH)); + rr.setAuthor(Optional.ofNullable(re.getAuthor()) + .map(aa -> aa.stream() + .limit(ModelHardLimits.MAX_RELATED_AUTHORS) + .collect(Collectors.toList())) + .orElse(null)); if (relation.getValidated() == null) { relation.setValidated(false); @@ -378,6 +385,7 @@ public class ProvisionModelSupport { rs.setPubliclyFunded(r.getPubliclyFunded()); rs.setTransformativeAgreement(r.getTransformativeAgreement()); rs.setExternalReference(mapExternalReference(r.getExternalReference())); + rs.setBestinstancetype(mapQualifier(r.getBestInstancetype())); rs.setInstance(mapInstances(r.getInstance())); if (r instanceof Publication) { @@ -667,14 +675,23 @@ public class ProvisionModelSupport { } private static List asAuthor(List authorList) { + return asAuthor(authorList, ModelHardLimits.MAX_AUTHORS); + } + + private static List asAuthor(List authorList, int maxAuthors) { return Optional .ofNullable(authorList) .map( authors -> authors .stream() + .limit(maxAuthors) .map( a -> Author - .newInstance(a.getFullname(), a.getName(), a.getSurname(), a.getRank(), asPid(a.getPid()))) + .newInstance( + StringUtils.left(a.getFullname(), ModelHardLimits.MAX_AUTHOR_FULLNAME_LENGTH), + a.getName(), + a.getSurname(), + a.getRank(), asPid(a.getPid()))) .collect(Collectors.toList())) .orElse(null); } diff --git a/pom.xml b/pom.xml index e1d99f25b..9480ddfc0 100644 --- a/pom.xml +++ b/pom.xml @@ -937,7 +937,7 @@ 1.1.3 1.7 1.0.7 - [8.0.1] + [9.0.0] cdh5.9.2 3.5 11.0.2 From 32fa579b809138f9ae5d52d661423b1cebf4fbe4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 28 Oct 2024 10:03:02 +0100 Subject: [PATCH 2/2] [graph provision] select the longest abstract --- .../dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 63f3c2ead..add1c80fa 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -167,8 +168,9 @@ public class CreateRelatedEntitiesJob_phase1 { result .getDescription() .stream() - .findFirst() + .filter(d -> Objects.nonNull(d.getValue())) .map(Field::getValue) + .max(Comparator.comparingInt(String::length)) .ifPresent( d -> re.setDescription(StringUtils.left(d, ModelHardLimits.MAX_RELATED_ABSTRACT_LENGTH))); }