From f0ea2410e5ef3250768ad6d78a24ea8c6aefabd3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 21 Jan 2022 10:50:34 +0100 Subject: [PATCH] improved mapping titles from datacite records to consider title types --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 27 ++++++++++++++++--- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 20 ++++++++++++++ .../dnetlib/dhp/oa/graph/raw/odf_dataset.xml | 3 ++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 194715295..639c1ab30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; @@ -11,8 +12,11 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; +import org.dom4j.Element; import org.dom4j.Node; +import com.google.common.collect.Lists; + import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.*; @@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareTitles(final Document doc, final DataInfo info) { - return prepareListStructProps( - doc, - "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']", - MAIN_TITLE_QUALIFIER, info); + + final List title = Lists.newArrayList(); + final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']"; + + for (Object o : doc.selectNodes(xpath)) { + Element e = (Element) o; + final String titleValue = e.getTextTrim(); + final String titleType = e.attributeValue("titleType"); + if (StringUtils.isNotBlank(titleType)) { + title + .add( + structuredProperty( + titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info)); + } else { + title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info)); + } + } + + return title; } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index de79b750a..6a3414b7c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -355,6 +356,25 @@ class MappersTest { assertTrue(r2.getValidated()); assertEquals("2020-01-01", r1.getValidationDate()); assertEquals("2020-01-01", r2.getValidationDate()); + + assertNotNull(d.getTitle()); + assertEquals(2, d.getTitle().size()); + verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017"); + verifyTitle(d, "Subtitle", "survey"); + } + + private void verifyTitle(Dataset d, String titleType, String title) { + Optional + .of( + d + .getTitle() + .stream() + .filter(t -> titleType.equals(t.getQualifier().getClassid())) + .collect(Collectors.toList())) + .ifPresent(t -> { + assertEquals(1, t.size()); + assertEquals(title, t.get(0).getValue()); + }); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml index 31de2e45b..4f41ee6ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml @@ -54,7 +54,8 @@ - Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 + Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 + survey Zenodo 2019