From cfc8948717b4f5da506af7fc3c0a3d230ecb4c69 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 15 May 2020 12:26:16 +0200 Subject: [PATCH] fixed mapping OdfToGraph: pick the correct element to map author pids and author affiliations; extended mapping Oaf2Graph: added support for author pids --- .../dhp/oa/graph/raw/OafToOafMapper.java | 20 +++++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 8 +-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 55 +++++++++++++++++-- .../dnetlib/dhp/oa/graph/raw/oaf_record.xml | 2 +- .../dnetlib/dhp/oa/graph/raw/odf_dataset.xml | 3 +- 5 files changed, 71 insertions(+), 17 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 891fee57e..6b6aa15e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -1,15 +1,16 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; -import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; +import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; +import org.dom4j.Element; import org.dom4j.Node; import com.google.common.collect.Lists; @@ -28,15 +29,24 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { final List res = new ArrayList<>(); int pos = 1; for (final Object o : doc.selectNodes("//dc:creator")) { - final Node n = (Node) o; + final Element e = (Element) o; final Author author = new Author(); - author.setFullname(n.getText()); + author.setFullname(e.getText()); author.setRank(pos++); - final PacePerson p = new PacePerson(n.getText(), false); + final PacePerson p = new PacePerson(e.getText(), false); if (p.isAccurate()) { author.setName(p.getNormalisedFirstName()); author.setSurname(p.getNormalisedSurname()); } + + final String pid = e.attributeValue("nameIdentifier"); + final String pidType = e.attributeValue("nameIdentifierScheme"); + + if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) { + author.setPid(new ArrayList<>()); + author.getPid().add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); + } + res.add(author); } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 5baac12fd..30b980c42 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -63,17 +63,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setSurname(surname); } - author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info)); - author.setPid(preparePids(doc, info)); + author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); + author.setPid(preparePids(n, info)); author.setRank(pos++); res.add(author); } return res; } - private List preparePids(final Document doc, final DataInfo info) { + private List preparePids(final Node n, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) { + for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { res .add( structuredProperty( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 951c97d9d..d7635c9ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -10,7 +10,10 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -19,12 +22,6 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; - @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -54,7 +51,26 @@ public class MappersTest { assertValidId(p.getId()); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + assertTrue(p.getAuthor().size() > 0); + Optional author = p.getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); + assertTrue(author.isPresent()); + StructuredProperty pid = author.get().getPid() + .stream() + .findFirst() + .get(); + assertEquals("0000-0001-6651-1178", pid.getValue()); + assertEquals("ORCID", pid.getQualifier().getClassid()); + assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); + assertEquals("Votsi,Nefta", author.get().getFullname()); + assertEquals("Votsi", author.get().getSurname()); + assertEquals("Nefta", author.get().getName()); + assertTrue(p.getSubject().size() > 0); assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); @@ -100,6 +116,33 @@ public class MappersTest { assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); + + Optional author = d.getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); + assertTrue(author.isPresent()); + StructuredProperty pid = author.get().getPid() + .stream() + .findFirst() + .get(); + assertEquals("0000-0001-9074-1619", pid.getValue()); + assertEquals("ORCID", pid.getQualifier().getClassid()); + assertEquals("ORCID", pid.getQualifier().getClassname()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); + assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); + assertEquals("Baracchini, Theo", author.get().getFullname()); + assertEquals("Baracchini", author.get().getSurname()); + assertEquals("Theo", author.get().getName()); + + assertEquals(1, author.get().getAffiliation().size()); + Optional> opAff = author.get().getAffiliation() + .stream() + .findFirst(); + assertTrue(opAff.isPresent()); + Field affiliation = opAff.get(); + assertEquals("ISTI-CNR", affiliation.getValue()); + assertTrue(d.getSubject().size() > 0); assertTrue(d.getInstance().size() > 0); assertTrue(d.getContext().size() > 0); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml index e898d4434..2cb0ba1c7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml @@ -19,7 +19,7 @@ Ecosystem Service capacity is higher in areas of multiple designation types Nikolaidou,Charitini - Votsi,Nefta + Votsi,Nefta Sgardelis,Steanos Halley,John Pantis,John diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml index 94dc802fa..88ae9d106 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml @@ -35,9 +35,10 @@ Baracchini, Theo + 0000-0001-9074-1619 Theo Baracchini - Physics of Aquatic Systems Laboratory (APHYS) – Margaretha Kamprad Chair, ENAC, EPFL, Lausanne, 1015, Switzerland + ISTI-CNR Wüest, Alfred