dhp-schema upgrade & provision mapping #498

Merged
claudio.atzori merged 2 commits from beta_provision_alignment_9.0.0 into beta 2024-10-28 10:03:24 +01:00
4 changed files with 23 additions and 3 deletions

View File

@ -12,6 +12,7 @@ public class ModelHardLimits {
public static final int MAX_EXTERNAL_ENTITIES = 50; public static final int MAX_EXTERNAL_ENTITIES = 50;
public static final int MAX_AUTHORS = 200; public static final int MAX_AUTHORS = 200;
public static final int MAX_RELATED_AUTHORS = 20;
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000; public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
public static final int MAX_TITLE_LENGTH = 5000; public static final int MAX_TITLE_LENGTH = 5000;
public static final int MAX_TITLES = 10; public static final int MAX_TITLES = 10;

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
@ -167,8 +168,9 @@ public class CreateRelatedEntitiesJob_phase1 {
result result
.getDescription() .getDescription()
.stream() .stream()
.findFirst() .filter(d -> Objects.nonNull(d.getValue()))
.map(Field::getValue) .map(Field::getValue)
.max(Comparator.comparingInt(String::length))
.ifPresent( .ifPresent(
d -> re.setDescription(StringUtils.left(d, ModelHardLimits.MAX_RELATED_ABSTRACT_LENGTH))); d -> re.setDescription(StringUtils.left(d, ModelHardLimits.MAX_RELATED_ABSTRACT_LENGTH)));
} }

View File

@ -5,6 +5,7 @@ import java.io.StringReader;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
@ -150,6 +151,12 @@ public class ProvisionModelSupport {
rr.setPublisher(re.getPublisher()); rr.setPublisher(re.getPublisher());
rr.setResulttype(mapQualifier(re.getResulttype())); rr.setResulttype(mapQualifier(re.getResulttype()));
rr.setTitle(Optional.ofNullable(re.getTitle()).map(StructuredProperty::getValue).orElse(null)); rr.setTitle(Optional.ofNullable(re.getTitle()).map(StructuredProperty::getValue).orElse(null));
rr.setDescription(StringUtils.left(re.getDescription(), ModelHardLimits.MAX_RELATED_ABSTRACT_LENGTH));
rr.setAuthor(Optional.ofNullable(re.getAuthor())
.map(aa -> aa.stream()
.limit(ModelHardLimits.MAX_RELATED_AUTHORS)
.collect(Collectors.toList()))
.orElse(null));
if (relation.getValidated() == null) { if (relation.getValidated() == null) {
relation.setValidated(false); relation.setValidated(false);
@ -378,6 +385,7 @@ public class ProvisionModelSupport {
rs.setPubliclyFunded(r.getPubliclyFunded()); rs.setPubliclyFunded(r.getPubliclyFunded());
rs.setTransformativeAgreement(r.getTransformativeAgreement()); rs.setTransformativeAgreement(r.getTransformativeAgreement());
rs.setExternalReference(mapExternalReference(r.getExternalReference())); rs.setExternalReference(mapExternalReference(r.getExternalReference()));
rs.setBestinstancetype(mapQualifier(r.getBestInstancetype()));
rs.setInstance(mapInstances(r.getInstance())); rs.setInstance(mapInstances(r.getInstance()));
if (r instanceof Publication) { if (r instanceof Publication) {
@ -667,14 +675,23 @@ public class ProvisionModelSupport {
} }
private static List<Author> asAuthor(List<eu.dnetlib.dhp.schema.oaf.Author> authorList) { private static List<Author> asAuthor(List<eu.dnetlib.dhp.schema.oaf.Author> authorList) {
return asAuthor(authorList, ModelHardLimits.MAX_AUTHORS);
}
private static List<Author> asAuthor(List<eu.dnetlib.dhp.schema.oaf.Author> authorList, int maxAuthors) {
return Optional return Optional
.ofNullable(authorList) .ofNullable(authorList)
.map( .map(
authors -> authors authors -> authors
.stream() .stream()
.limit(maxAuthors)
.map( .map(
a -> Author a -> Author
.newInstance(a.getFullname(), a.getName(), a.getSurname(), a.getRank(), asPid(a.getPid()))) .newInstance(
StringUtils.left(a.getFullname(), ModelHardLimits.MAX_AUTHOR_FULLNAME_LENGTH),
a.getName(),
a.getSurname(),
a.getRank(), asPid(a.getPid())))
.collect(Collectors.toList())) .collect(Collectors.toList()))
.orElse(null); .orElse(null);
} }

View File

@ -937,7 +937,7 @@
<commons.logging.version>1.1.3</commons.logging.version> <commons.logging.version>1.1.3</commons.logging.version>
<commons-validator.version>1.7</commons-validator.version> <commons-validator.version>1.7</commons-validator.version>
<dateparser.version>1.0.7</dateparser.version> <dateparser.version>1.0.7</dateparser.version>
<dhp-schemas.version>[8.0.1]</dhp-schemas.version> <dhp-schemas.version>[9.0.0]</dhp-schemas.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version> <dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version> <dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.guava.version>11.0.2</dhp.guava.version> <dhp.guava.version>11.0.2</dhp.guava.version>